use crate::error::Pdf2MdError;
use crate::progress::ConversionProgressCallback;
use edgequake_llm::LLMProvider;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::sync::Arc;
#[derive(Clone)]
pub struct ConversionConfig {
pub dpi: u32,
pub max_rendered_pixels: u32,
pub concurrency: usize,
pub model: Option<String>,
pub provider_name: Option<String>,
pub provider: Option<Arc<dyn LLMProvider>>,
pub temperature: f32,
pub max_tokens: usize,
pub max_retries: u32,
pub retry_backoff_ms: u64,
pub password: Option<String>,
pub system_prompt: Option<String>,
pub maintain_format: bool,
pub fidelity: FidelityTier,
pub pages: PageSelection,
pub page_separator: PageSeparator,
pub include_metadata: bool,
pub download_timeout_secs: u64,
pub api_timeout_secs: u64,
pub progress_callback: Option<Arc<dyn ConversionProgressCallback>>,
}
impl Default for ConversionConfig {
fn default() -> Self {
Self {
dpi: 150,
max_rendered_pixels: 2000,
concurrency: 10,
model: None,
provider_name: None,
provider: None,
temperature: 0.1,
max_tokens: 4096,
max_retries: 3,
retry_backoff_ms: 500,
password: None,
system_prompt: None,
maintain_format: false,
fidelity: FidelityTier::default(),
pages: PageSelection::default(),
page_separator: PageSeparator::default(),
include_metadata: false,
download_timeout_secs: 120,
api_timeout_secs: 60,
progress_callback: None,
}
}
}
impl fmt::Debug for ConversionConfig {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ConversionConfig")
.field("dpi", &self.dpi)
.field("max_rendered_pixels", &self.max_rendered_pixels)
.field("concurrency", &self.concurrency)
.field("model", &self.model)
.field("provider_name", &self.provider_name)
.field(
"provider",
&self.provider.as_ref().map(|_| "<dyn LLMProvider>"),
)
.field("temperature", &self.temperature)
.field("max_tokens", &self.max_tokens)
.field("max_retries", &self.max_retries)
.field("maintain_format", &self.maintain_format)
.field("fidelity", &self.fidelity)
.field("pages", &self.pages)
.field("page_separator", &self.page_separator)
.field(
"progress_callback",
&self.progress_callback.as_ref().map(|_| "<callback>"),
)
.finish()
}
}
impl ConversionConfig {
pub fn builder() -> ConversionConfigBuilder {
ConversionConfigBuilder {
config: Self::default(),
}
}
}
#[derive(Debug)]
pub struct ConversionConfigBuilder {
config: ConversionConfig,
}
impl ConversionConfigBuilder {
pub fn dpi(mut self, dpi: u32) -> Self {
self.config.dpi = dpi.clamp(72, 400);
self
}
pub fn max_rendered_pixels(mut self, px: u32) -> Self {
self.config.max_rendered_pixels = px.max(100);
self
}
pub fn concurrency(mut self, n: usize) -> Self {
self.config.concurrency = n.max(1);
self
}
pub fn model(mut self, model: impl Into<String>) -> Self {
self.config.model = Some(model.into());
self
}
pub fn provider_name(mut self, name: impl Into<String>) -> Self {
self.config.provider_name = Some(name.into());
self
}
pub fn provider(mut self, provider: Arc<dyn LLMProvider>) -> Self {
self.config.provider = Some(provider);
self
}
pub fn progress_callback(mut self, cb: Arc<dyn ConversionProgressCallback>) -> Self {
self.config.progress_callback = Some(cb);
self
}
pub fn temperature(mut self, t: f32) -> Self {
self.config.temperature = t.clamp(0.0, 2.0);
self
}
pub fn max_tokens(mut self, n: usize) -> Self {
self.config.max_tokens = n;
self
}
pub fn max_retries(mut self, n: u32) -> Self {
self.config.max_retries = n;
self
}
pub fn retry_backoff_ms(mut self, ms: u64) -> Self {
self.config.retry_backoff_ms = ms;
self
}
pub fn password(mut self, pwd: impl Into<String>) -> Self {
self.config.password = Some(pwd.into());
self
}
pub fn system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.config.system_prompt = Some(prompt.into());
self
}
pub fn maintain_format(mut self, v: bool) -> Self {
self.config.maintain_format = v;
self
}
pub fn fidelity(mut self, tier: FidelityTier) -> Self {
self.config.fidelity = tier;
self
}
pub fn pages(mut self, selection: PageSelection) -> Self {
self.config.pages = selection;
self
}
pub fn page_separator(mut self, sep: PageSeparator) -> Self {
self.config.page_separator = sep;
self
}
pub fn include_metadata(mut self, v: bool) -> Self {
self.config.include_metadata = v;
self
}
pub fn download_timeout_secs(mut self, secs: u64) -> Self {
self.config.download_timeout_secs = secs;
self
}
pub fn api_timeout_secs(mut self, secs: u64) -> Self {
self.config.api_timeout_secs = secs;
self
}
pub fn build(self) -> Result<ConversionConfig, Pdf2MdError> {
let c = &self.config;
if c.dpi < 72 || c.dpi > 400 {
return Err(Pdf2MdError::InvalidConfig(format!(
"DPI must be 72–400, got {}",
c.dpi
)));
}
if c.concurrency == 0 {
return Err(Pdf2MdError::InvalidConfig("Concurrency must be ≥ 1".into()));
}
Ok(self.config)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum FidelityTier {
Tier1,
#[default]
Tier2,
Tier3,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub enum PageSelection {
#[default]
All,
Single(usize),
Range(usize, usize),
Set(Vec<usize>),
}
impl PageSelection {
pub fn to_indices(&self, total_pages: usize) -> Vec<usize> {
let mut indices: Vec<usize> = match self {
PageSelection::All => (0..total_pages).collect(),
PageSelection::Single(p) => {
if *p >= 1 && *p <= total_pages {
vec![p - 1]
} else {
vec![]
}
}
PageSelection::Range(start, end) => {
let s = (*start).max(1) - 1;
let e = (*end).min(total_pages);
(s..e).collect()
}
PageSelection::Set(pages) => pages
.iter()
.filter(|&&p| p >= 1 && p <= total_pages)
.map(|p| p - 1)
.collect(),
};
indices.sort_unstable();
indices.dedup();
indices
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub enum PageSeparator {
#[default]
None,
HorizontalRule,
Comment,
Custom(String),
}
impl PageSeparator {
pub fn render(&self, page_num: usize) -> String {
match self {
PageSeparator::None => "\n\n".to_string(),
PageSeparator::HorizontalRule => "\n\n---\n\n".to_string(),
PageSeparator::Comment => format!("\n\n<!-- page {} -->\n\n", page_num),
PageSeparator::Custom(s) => format!("\n\n{}\n\n", s),
}
}
}