use super::backend::Backend;
use super::postprocess::PostProcessorConfig;
use super::router::RouterConfig;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PdfConfig {
pub router: RouterConfig,
pub postprocess: PostProcessorConfig,
pub python_path: PathBuf,
pub marker_path: Option<PathBuf>,
pub nougat_path: Option<PathBuf>,
pub temp_dir: Option<PathBuf>,
pub page_timeout: Duration,
pub document_timeout: Duration,
pub batch_workers: usize,
pub keep_temp_files: bool,
pub max_memory_per_worker: Option<usize>,
pub device: String,
}
impl Default for PdfConfig {
fn default() -> Self {
Self {
router: RouterConfig::default(),
postprocess: PostProcessorConfig::default(),
python_path: PathBuf::from("python3"),
marker_path: None,
nougat_path: None,
temp_dir: None,
page_timeout: Duration::from_secs(60),
document_timeout: Duration::from_secs(3600),
batch_workers: num_cpus(),
keep_temp_files: false,
max_memory_per_worker: None,
device: "cpu".to_string(),
}
}
}
impl PdfConfig {
pub fn builder() -> PdfConfigBuilder {
PdfConfigBuilder::new()
}
pub fn validate(&self) -> Result<(), String> {
if self.page_timeout.is_zero() {
return Err("page_timeout must be positive".into());
}
if self.document_timeout.is_zero() {
return Err("document_timeout must be positive".into());
}
if self.batch_workers == 0 {
return Err("batch_workers must be at least 1".into());
}
if self.router.math_density_threshold < 0.0 || self.router.math_density_threshold > 1.0 {
return Err("math_density_threshold must be between 0.0 and 1.0".into());
}
Ok(())
}
pub fn effective_temp_dir(&self) -> PathBuf {
self.temp_dir
.clone()
.unwrap_or_else(|| std::env::temp_dir().join("libgrammstein-pdf"))
}
}
#[derive(Debug, Clone, Default)]
pub struct PdfConfigBuilder {
config: PdfConfig,
}
impl PdfConfigBuilder {
pub fn new() -> Self {
Self {
config: PdfConfig::default(),
}
}
pub fn backend(mut self, backend: Backend) -> Self {
self.config.router.default_backend = backend;
self
}
pub fn math_density_threshold(mut self, threshold: f32) -> Self {
self.config.router.math_density_threshold = threshold;
self
}
pub fn parallel_pages(mut self, enabled: bool) -> Self {
self.config.router.parallel_pages = enabled;
self
}
pub fn python_path(mut self, path: impl Into<PathBuf>) -> Self {
self.config.python_path = path.into();
self
}
pub fn marker_path(mut self, path: impl Into<PathBuf>) -> Self {
self.config.marker_path = Some(path.into());
self
}
pub fn nougat_path(mut self, path: impl Into<PathBuf>) -> Self {
self.config.nougat_path = Some(path.into());
self
}
pub fn temp_dir(mut self, path: impl Into<PathBuf>) -> Self {
self.config.temp_dir = Some(path.into());
self
}
pub fn page_timeout(mut self, timeout: Duration) -> Self {
self.config.page_timeout = timeout;
self
}
pub fn document_timeout(mut self, timeout: Duration) -> Self {
self.config.document_timeout = timeout;
self
}
pub fn batch_workers(mut self, workers: usize) -> Self {
self.config.batch_workers = workers;
self
}
pub fn keep_temp_files(mut self, keep: bool) -> Self {
self.config.keep_temp_files = keep;
self
}
pub fn max_memory_per_worker(mut self, bytes: usize) -> Self {
self.config.max_memory_per_worker = Some(bytes);
self
}
pub fn device(mut self, device: impl Into<String>) -> Self {
self.config.device = device.into();
self
}
pub fn validate_latex(mut self, enabled: bool) -> Self {
self.config.postprocess.validate_latex = enabled;
self
}
pub fn normalize_output(mut self, enabled: bool) -> Self {
self.config.postprocess.normalize = enabled;
self
}
pub fn build(self) -> PdfConfig {
self.config
}
pub fn build_validated(self) -> Result<PdfConfig, String> {
let config = self.build();
config.validate()?;
Ok(config)
}
}
fn num_cpus() -> usize {
std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = PdfConfig::default();
assert!(config.validate().is_ok());
}
#[test]
fn test_builder() {
let config = PdfConfigBuilder::new()
.backend(Backend::Nougat)
.math_density_threshold(0.5)
.page_timeout(Duration::from_secs(120))
.batch_workers(8)
.build();
assert_eq!(config.router.default_backend, Backend::Nougat);
assert_eq!(config.router.math_density_threshold, 0.5);
assert_eq!(config.page_timeout, Duration::from_secs(120));
assert_eq!(config.batch_workers, 8);
}
#[test]
fn test_validation() {
let config = PdfConfigBuilder::new()
.math_density_threshold(1.5) .build();
assert!(config.validate().is_err());
}
}