Skip to main content

pdfium_render/pdf/document/
metadata.rs

1//! Defines the [PdfMetadata] struct, a collection of all the metadata tags in a [PdfDocument].
2
3use crate::bindgen::FPDF_DOCUMENT;
4use crate::pdfium::PdfiumLibraryBindingsAccessor;
5use crate::utils::mem::create_byte_buffer;
6use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
7use std::marker::PhantomData;
8use std::os::raw::c_void;
9use std::slice::Iter;
10
11#[cfg(doc)]
12use crate::pdf::document::PdfDocument;
13
14/// Valid metadata tag types in a [PdfDocument].
15#[derive(Debug, Copy, Clone, PartialEq)]
16pub enum PdfDocumentMetadataTagType {
17    Title,
18    Author,
19    Subject,
20    Keywords,
21    Creator,
22    Producer,
23    CreationDate,
24    ModificationDate,
25}
26
27/// A single metadata tag in a [PdfDocument].
28#[derive(Debug, Clone, PartialEq)]
29pub struct PdfDocumentMetadataTag {
30    tag: PdfDocumentMetadataTagType,
31    value: String,
32}
33
34impl PdfDocumentMetadataTag {
35    #[inline]
36    pub(crate) fn new(tag: PdfDocumentMetadataTagType, value: String) -> Self {
37        PdfDocumentMetadataTag { tag, value }
38    }
39
40    /// Returns the type of this metadata tag.
41    #[inline]
42    pub fn tag_type(&self) -> PdfDocumentMetadataTagType {
43        self.tag
44    }
45
46    /// Returns the value of this metadata tag.
47    #[inline]
48    pub fn value(&self) -> &str {
49        self.value.as_str()
50    }
51}
52
53/// A collection of all the metadata tags in a [PdfDocument].
54pub struct PdfMetadata<'a> {
55    document_handle: FPDF_DOCUMENT,
56    tags: Vec<PdfDocumentMetadataTag>,
57    lifetime: PhantomData<&'a FPDF_DOCUMENT>,
58}
59
60impl<'a> PdfMetadata<'a> {
61    pub(crate) fn from_pdfium(document_handle: FPDF_DOCUMENT) -> Self {
62        let mut result = PdfMetadata {
63            document_handle,
64            tags: vec![],
65            lifetime: PhantomData,
66        };
67
68        if let Some(tag) = result.get(PdfDocumentMetadataTagType::Title) {
69            result.tags.push(tag);
70        }
71
72        if let Some(tag) = result.get(PdfDocumentMetadataTagType::Author) {
73            result.tags.push(tag);
74        }
75
76        if let Some(tag) = result.get(PdfDocumentMetadataTagType::Subject) {
77            result.tags.push(tag);
78        }
79
80        if let Some(tag) = result.get(PdfDocumentMetadataTagType::Keywords) {
81            result.tags.push(tag);
82        }
83
84        if let Some(tag) = result.get(PdfDocumentMetadataTagType::Creator) {
85            result.tags.push(tag);
86        }
87
88        if let Some(tag) = result.get(PdfDocumentMetadataTagType::Producer) {
89            result.tags.push(tag);
90        }
91
92        if let Some(tag) = result.get(PdfDocumentMetadataTagType::CreationDate) {
93            result.tags.push(tag);
94        }
95
96        if let Some(tag) = result.get(PdfDocumentMetadataTagType::ModificationDate) {
97            result.tags.push(tag);
98        }
99
100        result
101    }
102
103    /// Returns the number of metadata tags in this [PdfMetadata] collection.
104    #[inline]
105    pub fn len(&self) -> usize {
106        self.tags.len()
107    }
108
109    /// Returns true if this [PdfMetadata] collection is empty.
110    #[inline]
111    pub fn is_empty(&self) -> bool {
112        self.len() == 0
113    }
114
115    /// Returns one metadata tag from this [PdfMetadata] collection, if it is defined.
116    pub fn get(&self, tag: PdfDocumentMetadataTagType) -> Option<PdfDocumentMetadataTag> {
117        let result = match tag {
118            PdfDocumentMetadataTagType::Title => self.get_raw_metadata_tag("Title"),
119            PdfDocumentMetadataTagType::Author => self.get_raw_metadata_tag("Author"),
120            PdfDocumentMetadataTagType::Subject => self.get_raw_metadata_tag("Subject"),
121            PdfDocumentMetadataTagType::Keywords => self.get_raw_metadata_tag("Keywords"),
122            PdfDocumentMetadataTagType::Creator => self.get_raw_metadata_tag("Creator"),
123            PdfDocumentMetadataTagType::Producer => self.get_raw_metadata_tag("Producer"),
124            PdfDocumentMetadataTagType::CreationDate => self.get_raw_metadata_tag("CreationDate"),
125            PdfDocumentMetadataTagType::ModificationDate => {
126                self.get_raw_metadata_tag("ModificationDate")
127            }
128        };
129
130        result.map(|value| PdfDocumentMetadataTag::new(tag, value))
131    }
132
133    #[inline]
134    fn get_raw_metadata_tag(&self, tag: &str) -> Option<String> {
135        // Retrieving the tag text from Pdfium is a two-step operation. First, we call
136        // FPDF_GetMetaText() with a null buffer; this will retrieve the length of
137        // the metadata text in bytes. If the length is zero, then there is no such tag.
138
139        // If the length is non-zero, then we reserve a byte buffer of the given
140        // length and call FPDF_GetMetaText() again with a pointer to the buffer;
141        // this will write the metadata text to the buffer in UTF16-LE format.
142
143        let buffer_length = unsafe {
144            self.bindings()
145                .FPDF_GetMetaText(self.document_handle, tag, std::ptr::null_mut(), 0)
146        };
147
148        if buffer_length == 0 {
149            // The tag is not present.
150
151            return None;
152        }
153
154        let mut buffer = create_byte_buffer(buffer_length as usize);
155
156        let result = unsafe {
157            self.bindings().FPDF_GetMetaText(
158                self.document_handle,
159                tag,
160                buffer.as_mut_ptr() as *mut c_void,
161                buffer_length,
162            )
163        };
164
165        assert_eq!(result, buffer_length);
166
167        get_string_from_pdfium_utf16le_bytes(buffer)
168    }
169
170    /// Returns an iterator over all the tags in this [PdfMetadata] collection.
171    #[inline]
172    pub fn iter(&self) -> Iter<'_, PdfDocumentMetadataTag> {
173        self.tags.iter()
174    }
175}
176
177impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfMetadata<'a> {}
178
179#[cfg(feature = "thread_safe")]
180unsafe impl<'a> Send for PdfMetadata<'a> {}
181
182#[cfg(feature = "thread_safe")]
183unsafe impl<'a> Sync for PdfMetadata<'a> {}