br-pdf 0.1.1

This is an PDF Invoice Processing
Documentation
use std::io::Cursor;
use pdf_extract::content::{Content, Operation};
use pdf_extract::{dictionary, Document, Object, ObjectId, Stream};

pub struct Pdf {
    version: f32,
    text: String,
    document: Document,
    filename: String,
    pub font_id: ObjectId,
    pub resources_id: ObjectId,
    pub pages_id: ObjectId,
    pub content_id: ObjectId,
}

impl Pdf {
    pub fn read_from_file(filename: &str) -> Result<Self, String> {
        let bytes = match std::fs::read(filename) {
            Ok(e) => e,
            Err(e) => return Err(format!("Failed to open {}", e)),
        };

        Self::inner_from(bytes, Some(filename))
    }

    pub fn read_from_bytes(bytes: Vec<u8>)->Result<Self, String> {
        Self::inner_from(bytes, None)
    }

    fn inner_from(bytes: Vec<u8>, filename: Option<&str>)->Result<Self, String> {
        let out = pdf_extract::extract_text_from_mem(&bytes).unwrap();
        let reader = Cursor::new(bytes);
        let document = Document::load_from(reader).unwrap();
        let filename = filename.unwrap_or("memory").to_string();

        Ok(Self {
            version: document.version.parse().unwrap(),
            text: out,
            document,
            filename,
            font_id: (0, 0),
            resources_id: (0, 0),
            pages_id: (0, 0),
            content_id: (0, 0),
        })
    }

    pub fn version(&self) -> f32 {
        self.version
    }
    pub fn text(&self) -> String {
        self.text.clone()
    }
    pub fn get_text_list(&self) -> Vec<&str> {
        let lines = self.text.lines();
        let mut list = vec![];
        for line in lines {
            if line.trim().is_empty() {
                continue;
            }
            list.push(line.trim());
        }
        list
    }

    pub fn write(filename: &str) -> Result<Self, String> {
        let mut doc = Document::with_version("1.7");
        let pages_id = doc.new_object_id();

        let font_id = doc.add_object(dictionary! {
            "Type" => "Font",
            "Subtype" => "Type1",
            "BaseFont" => "Courier",
        });
        let resources_id = doc.add_object(dictionary! {
            "Font" => dictionary! {
                "F1" => font_id,
            },
        });
        Ok(Self {
            version: doc.version.parse().unwrap(),
            text: "".to_string(),
            document: doc,
            pages_id,
            font_id,
            resources_id,
            filename: filename.to_string(),
            content_id: (0, 0),
        })
    }

    pub fn page_data(mut self) {
        let content = Content {
            operations: vec![
                Operation::new("BT", vec![]),
                Operation::new("Tf", vec!["F1".into(), 10.into()]),
                Operation::new("Td", vec![0.into(), 800.into()]),
                Operation::new("Tj", vec![Object::string_literal("Hello World!")]),
                Operation::new("ET", vec![]),
            ],
        };
        let content_id = self
            .document
            .add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
        self.content_id = content_id;
        let page_id = self.document.add_object(dictionary! {
            "Type" => "Page",
            "Parent" => self.pages_id,
            "Contents" => content_id,
        });
        let pages = dictionary! {
            // Type of dictionary
            "Type" => "Pages",
            // Vector of page IDs in document. Normally would contain more than one ID
            // and be produced using a loop of some kind.
            "Kids" => vec![page_id.into()],
            // Page count
            "Count" => 1,
            // ID of resources dictionary, defined earlier
            "Resources" => self.resources_id,
            // A rectangle that defines the boundaries of the physical or digital media.
            // This is the "page size".
            "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()],
        };
        self.document
            .objects
            .insert(self.pages_id, Object::Dictionary(pages));
        let catalog_id = self.document.add_object(dictionary! {
            "Type" => "Catalog",
            "Pages" => self.pages_id,
        });
        self.document.trailer.set("Root", catalog_id);
        self.document.compress();
        self.document.save(self.filename).unwrap();
    }
}