use super::{AtomicToken, FileReaderImpl, Image};
use crate::chunk::ChunkExtraInfo;
use crate::error::Error;
use crate::index::BuildConfig;
use mupdf::{Colorspace, Document, ImageFormat, Matrix};
use ragit_pdl::ImageType;
pub struct PdfReader {
path: String,
images: Vec<(AtomicToken, usize /* page_no */)>,
page_count: usize,
cursor: usize,
}
impl FileReaderImpl for PdfReader {
fn new(path: &str, _root_dir: &str, _config: &BuildConfig) -> Result<Self, Error> {
let document = Document::open(path)?;
let page_count = document.page_count()?.max(0) as usize;
let mut result = PdfReader {
images: vec![],
path: path.to_string(),
page_count,
cursor: 0,
};
if result.cursor < result.page_count {
for _ in 0..64 {
result.images.push((convert_page(&document, result.cursor as i32)?, result.cursor + 1));
result.cursor += 1;
if result.cursor >= result.page_count {
break;
}
}
}
Ok(result)
}
fn load_tokens(&mut self) -> Result<(), Error> {
if self.cursor < self.page_count {
let document = Document::open(&self.path)?;
for _ in 0..64 {
self.images.push((convert_page(&document, self.cursor as i32)?, self.cursor + 1));
self.cursor += 1;
if self.cursor >= self.page_count {
break;
}
}
}
Ok(())
}
fn pop_all_tokens(&mut self) -> Result<Vec<AtomicToken>, Error> {
let mut result = Vec::with_capacity(self.images.len());
for (image, page_no) in self.images.iter() {
result.push(image.clone());
result.push(AtomicToken::ChunkExtraInfo(ChunkExtraInfo { page_no: Some(*page_no) }));
result.push(AtomicToken::PageBreak);
}
self.images = vec![];
Ok(result)
}
fn has_more_to_read(&self) -> bool {
self.cursor < self.page_count || !self.images.is_empty()
}
fn key(&self) -> String {
String::from("pdf_reader_v1")
}
}
fn convert_page(
document: &Document,
page: i32,
) -> Result<AtomicToken, Error> {
let page = document.load_page(page)?;
let bounds = page.bounds()?;
let width = bounds.x1 - bounds.x0;
let height = bounds.y1 - bounds.y0;
let zoom = 1024.0 / width.max(height).max(0.1);
let colorspace = Colorspace::device_rgb();
let matrix = Matrix::new_scale(zoom, zoom);
let mut pixmap = page.to_pixmap(&matrix, &colorspace, false, false)?;
let (pixmap_width, pixmap_height) = pixmap.resolution();
pixmap.set_resolution(
(pixmap_width as f32 * zoom) as i32,
(pixmap_height as f32 * zoom) as i32,
);
let mut bytes = vec![];
pixmap.write_to(
&mut bytes,
ImageFormat::PNG,
)?;
Ok(AtomicToken::Image(Image::new(bytes, ImageType::Png)?))
}