use crate::error::Pdf2MdError;
use std::path::{Path, PathBuf};
use tempfile::TempDir;
use tracing::{debug, info};
pub enum ResolvedInput {
Local(PathBuf),
Downloaded { path: PathBuf, _temp_dir: TempDir },
}
impl ResolvedInput {
pub fn path(&self) -> &Path {
match self {
ResolvedInput::Local(p) => p,
ResolvedInput::Downloaded { path, .. } => path,
}
}
}
pub fn is_url(input: &str) -> bool {
input.starts_with("http://") || input.starts_with("https://")
}
pub async fn resolve_input(input: &str, timeout_secs: u64) -> Result<ResolvedInput, Pdf2MdError> {
if is_url(input) {
download_url(input, timeout_secs).await
} else {
resolve_local(input)
}
}
fn resolve_local(path_str: &str) -> Result<ResolvedInput, Pdf2MdError> {
let path = PathBuf::from(path_str);
if !path.exists() {
return Err(Pdf2MdError::FileNotFound { path });
}
match std::fs::File::open(&path) {
Ok(mut f) => {
use std::io::Read;
let mut magic = [0u8; 4];
if f.read_exact(&mut magic).is_ok() && &magic != b"%PDF" {
return Err(Pdf2MdError::NotAPdf { path, magic });
}
}
Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
return Err(Pdf2MdError::PermissionDenied { path });
}
Err(_) => {
return Err(Pdf2MdError::FileNotFound { path });
}
}
debug!("Resolved local PDF: {}", path.display());
Ok(ResolvedInput::Local(path))
}
async fn download_url(url: &str, timeout_secs: u64) -> Result<ResolvedInput, Pdf2MdError> {
info!("Downloading PDF from: {}", url);
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(timeout_secs))
.build()
.map_err(|e| Pdf2MdError::DownloadFailed {
url: url.to_string(),
reason: e.to_string(),
})?;
let response = client.get(url).send().await.map_err(|e| {
if e.is_timeout() {
Pdf2MdError::DownloadTimeout {
url: url.to_string(),
secs: timeout_secs,
}
} else {
Pdf2MdError::DownloadFailed {
url: url.to_string(),
reason: e.to_string(),
}
}
})?;
if !response.status().is_success() {
return Err(Pdf2MdError::DownloadFailed {
url: url.to_string(),
reason: format!("HTTP {}", response.status()),
});
}
let filename = extract_filename(url, &response);
let temp_dir = TempDir::new().map_err(|e| Pdf2MdError::Internal(e.to_string()))?;
let file_path = temp_dir.path().join(&filename);
let bytes = response
.bytes()
.await
.map_err(|e| Pdf2MdError::DownloadFailed {
url: url.to_string(),
reason: e.to_string(),
})?;
tokio::fs::write(&file_path, &bytes)
.await
.map_err(|e| Pdf2MdError::Internal(format!("Failed to write temp file: {}", e)))?;
if bytes.len() >= 4 && &bytes[..4] != b"%PDF" {
let mut magic = [0u8; 4];
magic.copy_from_slice(&bytes[..4]);
return Err(Pdf2MdError::NotAPdf {
path: file_path,
magic,
});
}
info!("Downloaded to: {}", file_path.display());
Ok(ResolvedInput::Downloaded {
path: file_path,
_temp_dir: temp_dir,
})
}
fn extract_filename(url: &str, _response: &reqwest::Response) -> String {
if let Ok(parsed) = reqwest::Url::parse(url) {
if let Some(mut segments) = parsed.path_segments() {
if let Some(last) = segments.next_back() {
if !last.is_empty() && last.contains('.') {
return last.to_string();
}
}
}
}
"downloaded.pdf".to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_url() {
assert!(is_url("https://example.com/doc.pdf"));
assert!(is_url("http://example.com/doc.pdf"));
assert!(!is_url("/tmp/doc.pdf"));
assert!(!is_url("doc.pdf"));
assert!(!is_url(""));
}
#[test]
fn test_page_selection_to_indices() {
use crate::config::PageSelection;
assert_eq!(PageSelection::All.to_indices(5), vec![0, 1, 2, 3, 4]);
assert_eq!(PageSelection::Single(3).to_indices(5), vec![2]);
assert_eq!(PageSelection::Single(6).to_indices(5), Vec::<usize>::new());
assert_eq!(PageSelection::Range(2, 4).to_indices(5), vec![1, 2, 3]);
assert_eq!(
PageSelection::Set(vec![1, 3, 5]).to_indices(5),
vec![0, 2, 4]
);
assert_eq!(
PageSelection::Set(vec![3, 1, 3]).to_indices(5),
vec![0, 2] );
}
}