Skip to main content

aster/media/
pdf.rs

1//! PDF 解析模块
2//!
3
4use base64::{engine::general_purpose::STANDARD, Engine};
5use std::collections::HashSet;
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::sync::LazyLock;
9
10/// PDF 最大文件大小 (32MB)
11pub const PDF_MAX_SIZE: u64 = 33554432;
12
13/// PDF 扩展名
14pub static PDF_EXTENSIONS: LazyLock<HashSet<&'static str>> =
15    LazyLock::new(|| HashSet::from(["pdf"]));
16
17/// PDF 读取结果
18#[derive(Debug, Clone)]
19pub struct PdfReadResult {
20    pub file_path: PathBuf,
21    pub base64: String,
22    pub original_size: u64,
23}
24
25/// 检查是否支持 PDF
26pub fn is_pdf_supported() -> bool {
27    std::env::var("ASTER_PDF_SUPPORT")
28        .map(|v| v != "false")
29        .unwrap_or(true)
30}
31
32/// 验证文件扩展名是否为 PDF
33pub fn is_pdf_extension(ext: &str) -> bool {
34    let normalized = ext.strip_prefix('.').unwrap_or(ext).to_lowercase();
35    PDF_EXTENSIONS.contains(normalized.as_str())
36}
37
38/// 格式化字节大小
39fn format_bytes(bytes: u64) -> String {
40    if bytes < 1024 {
41        format!("{} B", bytes)
42    } else if bytes < 1048576 {
43        format!("{:.2} KB", bytes as f64 / 1024.0)
44    } else {
45        format!("{:.2} MB", bytes as f64 / 1048576.0)
46    }
47}
48
49/// 读取 PDF 文件并返回 base64
50pub fn read_pdf_file(file_path: &Path) -> Result<PdfReadResult, String> {
51    let metadata =
52        fs::metadata(file_path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
53
54    let size = metadata.len();
55
56    if size == 0 {
57        return Err(format!("PDF file is empty: {}", file_path.display()));
58    }
59
60    if size > PDF_MAX_SIZE {
61        return Err(format!(
62            "PDF file size ({}) exceeds maximum allowed size ({}). PDF files must be less than 32MB.",
63            format_bytes(size),
64            format_bytes(PDF_MAX_SIZE)
65        ));
66    }
67
68    let buffer = fs::read(file_path).map_err(|e| format!("Failed to read file: {}", e))?;
69
70    let base64 = STANDARD.encode(&buffer);
71
72    Ok(PdfReadResult {
73        file_path: file_path.to_path_buf(),
74        base64,
75        original_size: size,
76    })
77}
78
79/// 验证 PDF 文件是否有效
80pub fn validate_pdf_file(file_path: &Path) -> Result<(), String> {
81    if !file_path.exists() {
82        return Err("File does not exist".to_string());
83    }
84
85    let metadata =
86        fs::metadata(file_path).map_err(|e| format!("Failed to read metadata: {}", e))?;
87
88    let size = metadata.len();
89
90    if size == 0 {
91        return Err("PDF file is empty".to_string());
92    }
93
94    if size > PDF_MAX_SIZE {
95        return Err(format!(
96            "PDF file size ({}) exceeds maximum allowed size ({})",
97            format_bytes(size),
98            format_bytes(PDF_MAX_SIZE)
99        ));
100    }
101
102    // 验证文件头(PDF 文件应以 %PDF- 开头)
103    let buffer = fs::read(file_path).map_err(|e| format!("Failed to read file: {}", e))?;
104
105    if buffer.len() >= 5 {
106        let header = std::str::from_utf8(&buffer[..5]).unwrap_or("");
107        if !header.starts_with("%PDF-") {
108            return Err("File is not a valid PDF (invalid header)".to_string());
109        }
110    }
111
112    Ok(())
113}