use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum DocumentFormat {
Markdown,
Pdf,
Html,
Docx,
}
impl DocumentFormat {
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_lowercase().as_str() {
"md" | "markdown" => Some(Self::Markdown),
"pdf" => Some(Self::Pdf),
"html" | "htm" => Some(Self::Html),
"docx" => Some(Self::Docx),
_ => None,
}
}
pub fn extension(&self) -> &'static str {
match self {
Self::Markdown => "md",
Self::Pdf => "pdf",
Self::Html => "html",
Self::Docx => "docx",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RawNode {
pub title: String,
pub content: String,
pub level: usize,
pub line_start: usize,
pub line_end: usize,
pub page: Option<usize>,
pub token_count: Option<usize>,
#[serde(default)]
pub total_token_count: Option<usize>,
}
impl Default for RawNode {
fn default() -> Self {
Self {
title: String::new(),
content: String::new(),
level: 0,
line_start: 1,
line_end: 1,
page: None,
token_count: None,
total_token_count: None,
}
}
}
impl RawNode {
pub fn new(title: impl Into<String>) -> Self {
Self {
title: title.into(),
..Default::default()
}
}
pub fn with_content(mut self, content: impl Into<String>) -> Self {
self.content = content.into();
self
}
pub fn with_level(mut self, level: usize) -> Self {
self.level = level;
self
}
pub fn with_lines(mut self, start: usize, end: usize) -> Self {
self.line_start = start;
self.line_end = end;
self
}
pub fn with_page(mut self, page: usize) -> Self {
self.page = Some(page);
self
}
pub fn has_content(&self) -> bool {
!self.content.trim().is_empty()
}
pub fn char_count(&self) -> usize {
self.content.chars().count()
}
pub fn word_count(&self) -> usize {
self.content.split_whitespace().count()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentMeta {
pub name: String,
pub format: DocumentFormat,
pub page_count: Option<usize>,
pub line_count: usize,
pub source_path: Option<String>,
pub description: Option<String>,
}
impl Default for DocumentMeta {
fn default() -> Self {
Self {
name: String::new(),
format: DocumentFormat::Markdown,
page_count: None,
line_count: 0,
source_path: None,
description: None,
}
}
}
#[derive(Debug, Clone)]
pub struct ParseResult {
pub meta: DocumentMeta,
pub nodes: Vec<RawNode>,
}
impl ParseResult {
pub fn new(meta: DocumentMeta, nodes: Vec<RawNode>) -> Self {
Self { meta, nodes }
}
pub fn node_count(&self) -> usize {
self.nodes.len()
}
pub fn is_empty(&self) -> bool {
self.nodes.is_empty()
}
}