1use std::io::Cursor;
2use pdf_extract::content::{Content, Operation};
3use pdf_extract::{dictionary, Document, Object, ObjectId, Stream};
4
5pub struct Pdf {
6 version: f32,
7 text: String,
8 document: Document,
9 filename: String,
10 pub font_id: ObjectId,
11 pub resources_id: ObjectId,
12 pub pages_id: ObjectId,
13 pub content_id: ObjectId,
14}
15
16impl Pdf {
17 pub fn read_from_file(filename: &str) -> Result<Self, String> {
18 let bytes = match std::fs::read(filename) {
19 Ok(e) => e,
20 Err(e) => return Err(format!("Failed to open {}", e)),
21 };
22
23 Self::inner_from(bytes, Some(filename))
24 }
25
26 pub fn read_from_bytes(bytes: Vec<u8>)->Result<Self, String> {
27 Self::inner_from(bytes, None)
28 }
29
30 fn inner_from(bytes: Vec<u8>, filename: Option<&str>)->Result<Self, String> {
31 let out = pdf_extract::extract_text_from_mem(&bytes).unwrap();
32 let reader = Cursor::new(bytes);
33 let document = Document::load_from(reader).unwrap();
34 let filename = filename.unwrap_or("memory").to_string();
35
36 Ok(Self {
37 version: document.version.parse().unwrap(),
38 text: out,
39 document,
40 filename,
41 font_id: (0, 0),
42 resources_id: (0, 0),
43 pages_id: (0, 0),
44 content_id: (0, 0),
45 })
46 }
47
48 pub fn version(&self) -> f32 {
49 self.version
50 }
51 pub fn text(&self) -> String {
52 self.text.clone()
53 }
54 pub fn get_text_list(&self) -> Vec<&str> {
55 let lines = self.text.lines();
56 let mut list = vec![];
57 for line in lines {
58 if line.trim().is_empty() {
59 continue;
60 }
61 list.push(line.trim());
62 }
63 list
64 }
65
66 pub fn write(filename: &str) -> Result<Self, String> {
67 let mut doc = Document::with_version("1.7");
68 let pages_id = doc.new_object_id();
69
70 let font_id = doc.add_object(dictionary! {
71 "Type" => "Font",
72 "Subtype" => "Type1",
73 "BaseFont" => "Courier",
74 });
75 let resources_id = doc.add_object(dictionary! {
76 "Font" => dictionary! {
77 "F1" => font_id,
78 },
79 });
80 Ok(Self {
81 version: doc.version.parse().unwrap(),
82 text: "".to_string(),
83 document: doc,
84 pages_id,
85 font_id,
86 resources_id,
87 filename: filename.to_string(),
88 content_id: (0, 0),
89 })
90 }
91
92 pub fn page_data(mut self) {
93 let content = Content {
94 operations: vec![
95 Operation::new("BT", vec![]),
96 Operation::new("Tf", vec!["F1".into(), 10.into()]),
97 Operation::new("Td", vec![0.into(), 800.into()]),
98 Operation::new("Tj", vec![Object::string_literal("Hello World!")]),
99 Operation::new("ET", vec![]),
100 ],
101 };
102 let content_id = self
103 .document
104 .add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
105 self.content_id = content_id;
106 let page_id = self.document.add_object(dictionary! {
107 "Type" => "Page",
108 "Parent" => self.pages_id,
109 "Contents" => content_id,
110 });
111 let pages = dictionary! {
112 "Type" => "Pages",
114 "Kids" => vec![page_id.into()],
117 "Count" => 1,
119 "Resources" => self.resources_id,
121 "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()],
124 };
125 self.document
126 .objects
127 .insert(self.pages_id, Object::Dictionary(pages));
128 let catalog_id = self.document.add_object(dictionary! {
129 "Type" => "Catalog",
130 "Pages" => self.pages_id,
131 });
132 self.document.trailer.set("Root", catalog_id);
133 self.document.compress();
134 self.document.save(self.filename).unwrap();
135 }
136}