1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
use super::*;
use lazy_static::lazy_static;
use spawning::SpawningFileAdapter;
use std::io::BufReader;
use std::process::Command;
static EXTENSIONS: &[&str] = &["pdf"];
lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "poppler".to_owned(),
version: 1,
description: "Uses pdftotext (from poppler-utils) to extract plain text from PDF files"
.to_owned(),
fast_matchers: EXTENSIONS
.iter()
.map(|s| FastMatcher::FileExtension(s.to_string()))
.collect(),
slow_matchers: None
};
}
#[derive(Default)]
pub struct PopplerAdapter;
impl PopplerAdapter {
pub fn new() -> PopplerAdapter {
PopplerAdapter
}
}
impl GetMetadata for PopplerAdapter {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}
impl SpawningFileAdapter for PopplerAdapter {
fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Fallible<()> {
let mut page = 1;
for line in BufReader::new(inp).lines() {
let mut line = line?;
if line.contains('\x0c') {
line = line.replace('\x0c', "");
page += 1;
}
oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?;
}
Ok(())
}
fn get_exe(&self) -> &str {
"pdftotext"
}
fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command {
cmd.arg("-layout").arg("-").arg("-");
cmd
}
}