1use pdf_extract::content::{Content, Operation};
2use pdf_extract::{dictionary, Document, Object, ObjectId, Stream};
3
4pub struct Pdf {
5 version: f32,
6 text: String,
7 document: Document,
8 filename: String,
9 pub font_id: ObjectId,
10 pub resources_id: ObjectId,
11 pub pages_id: ObjectId,
12 pub content_id: ObjectId,
13}
14
15impl Pdf {
16 pub fn read(filename: &str) -> Result<Self, String> {
17 let bytes = match std::fs::read(filename) {
18 Ok(e) => e,
19 Err(e) => return Err(format!("Failed to open {}", e)),
20 };
21 let document = Document::load(filename).unwrap();
22 let out = pdf_extract::extract_text_from_mem(&bytes).unwrap();
23 Ok(Self {
24 version: document.version.parse().unwrap(),
25 text: out,
26 document,
27 filename: filename.to_string(),
28 font_id: (0, 0),
29 resources_id: (0, 0),
30 pages_id: (0, 0),
31 content_id: (0, 0),
32 })
33 }
34 pub fn version(&self) -> f32 {
35 self.version
36 }
37 pub fn text(&self) -> String {
38 self.text.clone()
39 }
40 pub fn get_text_list(&self) -> Vec<&str> {
41 let lines = self.text.lines();
42 let mut list = vec![];
43 for line in lines {
44 if line.trim().is_empty() {
45 continue;
46 }
47 list.push(line.trim());
48 }
49 list
50 }
51
52 pub fn write(filename: &str) -> Result<Self, String> {
53 let mut doc = Document::with_version("1.7");
54 let pages_id = doc.new_object_id();
55
56 let font_id = doc.add_object(dictionary! {
57 "Type" => "Font",
58 "Subtype" => "Type1",
59 "BaseFont" => "Courier",
60 });
61 let resources_id = doc.add_object(dictionary! {
62 "Font" => dictionary! {
63 "F1" => font_id,
64 },
65 });
66 Ok(Self {
67 version: doc.version.parse().unwrap(),
68 text: "".to_string(),
69 document: doc,
70 pages_id,
71 font_id,
72 resources_id,
73 filename: filename.to_string(),
74 content_id: (0, 0),
75 })
76 }
77
78 pub fn page_data(mut self) {
79 let content = Content {
80 operations: vec![
81 Operation::new("BT", vec![]),
82 Operation::new("Tf", vec!["F1".into(), 10.into()]),
83 Operation::new("Td", vec![0.into(), 800.into()]),
84 Operation::new("Tj", vec![Object::string_literal("Hello World!")]),
85 Operation::new("ET", vec![]),
86 ],
87 };
88 let content_id = self
89 .document
90 .add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
91 self.content_id = content_id;
92 let page_id = self.document.add_object(dictionary! {
93 "Type" => "Page",
94 "Parent" => self.pages_id,
95 "Contents" => content_id,
96 });
97 let pages = dictionary! {
98 "Type" => "Pages",
100 "Kids" => vec![page_id.into()],
103 "Count" => 1,
105 "Resources" => self.resources_id,
107 "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()],
110 };
111 self.document
112 .objects
113 .insert(self.pages_id, Object::Dictionary(pages));
114 let catalog_id = self.document.add_object(dictionary! {
115 "Type" => "Catalog",
116 "Pages" => self.pages_id,
117 });
118 self.document.trailer.set("Root", catalog_id);
119 self.document.compress();
120 self.document.save(self.filename).unwrap();
121 }
122}