#![allow(dead_code)]
use std::io::{Read, Write};
use lopdf::{
dictionary, Document, Object,
Object::{Array, Integer, Name, Reference},
ObjectId, Stream,
};
use thiserror::Error;
static AF_RELATIONSHIP_KEY: &[u8] = b"AFRelationship";
static ANNOTATIONS_KEY: &[u8] = b"Annots";
static ASSOCIATED_FILE_KEY: &[u8] = b"AF";
static C2PA_RELATIONSHIP: &[u8] = b"C2PA_Manifest";
static CONTENT_CREDS: &str = "Content Credentials";
static EMBEDDED_FILES_KEY: &[u8] = b"EmbeddedFiles";
static SUBTYPE_KEY: &[u8] = b"Subtype";
static TYPE_KEY: &[u8] = b"Type";
static NAMES_KEY: &[u8] = b"Names";
#[derive(Debug, Error)]
pub enum Error {
#[error(transparent)]
UnableToReadPdf(#[from] lopdf::Error),
#[error("No manifest is present in the PDF.")]
NoManifest,
#[error("Unable to add C2PA manifest as an annotation to the PDF.")]
AddingAnnotation,
#[error("Unable to find C2PA manifest in the PDF's embedded files.")]
UnableToFindEmbeddedFileManifest,
#[error("Unable to find a C2PA embedded file specification in PDF's associated files array")]
FindingC2PAFileSpec,
}
const C2PA_MIME_TYPE: &str = "application/x-c2pa-manifest-store";
#[cfg_attr(test, mockall::automock)]
pub(crate) trait C2paPdf: Sized {
fn save_to<W: Write + 'static>(&mut self, writer: &mut W) -> Result<(), std::io::Error>;
fn is_password_protected(&self) -> bool;
fn has_c2pa_manifest(&self) -> bool;
fn write_manifest_as_embedded_file(&mut self, bytes: Vec<u8>) -> Result<(), Error>;
fn write_manifest_as_annotation(&mut self, vec: Vec<u8>) -> Result<(), Error>;
#[allow(clippy::needless_lifetimes)] fn read_manifest_bytes<'a>(&'a self) -> Result<Option<Vec<&'a [u8]>>, Error>;
fn remove_manifest_bytes(&mut self) -> Result<(), Error>;
fn read_xmp(&self) -> Option<String>;
}
pub(crate) struct Pdf {
document: Document,
}
impl C2paPdf for Pdf {
fn save_to<W: Write>(&mut self, writer: &mut W) -> Result<(), std::io::Error> {
self.document.save_to(writer)
}
fn is_password_protected(&self) -> bool {
self.document.is_encrypted()
}
fn has_c2pa_manifest(&self) -> bool {
self.c2pa_file_spec_object_id().is_some()
}
fn write_manifest_as_embedded_file(&mut self, bytes: Vec<u8>) -> Result<(), Error> {
let file_stream_ref = self.add_c2pa_embedded_file_stream(bytes);
let file_spec_ref = self.add_embedded_file_specification(file_stream_ref);
self.push_associated_file(file_spec_ref)?;
let mut manifest_name_file_pair = vec![
Object::string_literal(CONTENT_CREDS),
Reference(file_spec_ref),
];
let Ok(catalog_names) = self.document.catalog_mut()?.get_mut(NAMES_KEY) else {
let embedded_files_ref = self.document.add_object(dictionary! {
NAMES_KEY => manifest_name_file_pair
});
let names_ref = self.document.add_object(dictionary! {
EMBEDDED_FILES_KEY => Reference(embedded_files_ref)
});
self.document.catalog_mut()?.set(NAMES_KEY, names_ref);
return Ok(());
};
let names_dictionary = match catalog_names.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?,
_ => catalog_names.as_dict_mut()?,
};
let Ok(embedded_files) = names_dictionary.get_mut(EMBEDDED_FILES_KEY) else {
names_dictionary.set(
EMBEDDED_FILES_KEY,
dictionary! { NAMES_KEY => manifest_name_file_pair },
);
return Ok(());
};
let embedded_files_dictionary = match embedded_files.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?,
_ => embedded_files.as_dict_mut()?,
};
let Ok(names) = embedded_files_dictionary.get_mut(NAMES_KEY) else {
embedded_files_dictionary.set(
NAMES_KEY,
dictionary! { NAMES_KEY => manifest_name_file_pair },
);
return Ok(());
};
let names_array = match names.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?.as_array_mut()?,
_ => names.as_array_mut()?,
};
names_array.append(&mut manifest_name_file_pair);
Ok(())
}
fn write_manifest_as_annotation(&mut self, bytes: Vec<u8>) -> Result<(), Error> {
let file_stream_reference = self.add_c2pa_embedded_file_stream(bytes);
let file_spec_reference = self.add_embedded_file_specification(file_stream_reference);
self.push_associated_file(file_spec_reference)?;
self.add_file_attachment_annotation(file_spec_reference)?;
Ok(())
}
fn read_manifest_bytes(&self) -> Result<Option<Vec<&[u8]>>, Error> {
let Some(id) = self.c2pa_file_spec_object_id() else {
return Ok(None);
};
let ef = &self
.document
.get_object(id)
.and_then(Object::as_dict)?
.get_deref(b"EF", &self.document)?
.as_dict()?;
Ok(Some(vec![
&ef.get_deref(b"F", &self.document)? .as_stream()?
.content,
]))
}
fn remove_manifest_bytes(&mut self) -> Result<(), Error> {
if !self.has_c2pa_manifest() {
return Err(Error::NoManifest);
}
let file_spec_ref = self.c2pa_file_spec_object_id().ok_or(Error::NoManifest)?;
let file_stream_ef_ref = self
.document
.get_object(file_spec_ref)?
.as_dict()?
.get(b"EF")?;
let file_stream_ref = file_stream_ef_ref.as_dict()?.get(b"F")?.as_reference()?;
self.remove_manifest_from_embedded_files()
.or_else(|_| self.remove_manifest_from_annotations())?;
self.remove_c2pa_file_spec_reference()?;
self.document.delete_object(file_stream_ref);
self.document.delete_object(file_spec_ref);
Ok(())
}
fn read_xmp(&self) -> Option<String> {
self.document
.catalog()
.and_then(|catalog| catalog.get_deref(b"Metadata", &self.document))
.and_then(Object::as_stream)
.ok()
.and_then(|stream_dict| {
let Ok(subtype_bytes) = stream_dict
.dict
.get_deref(SUBTYPE_KEY, &self.document)
.and_then(Object::as_name)
else {
return None;
};
let subtype_str = str::from_utf8(subtype_bytes).ok()?;
if subtype_str.to_lowercase() != "xml" {
return None;
}
String::from_utf8(stream_dict.content.clone()).ok()
})
}
}
impl Pdf {
#[allow(dead_code)]
pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
let document = Document::load_mem(bytes)?;
Ok(Self { document })
}
pub fn from_reader<R: Read>(source: R) -> Result<Self, Error> {
let document = Document::load_from(source)?;
Ok(Self { document })
}
fn associated_files(&self) -> Result<&Vec<Object>, Error> {
Ok(self
.document
.catalog()?
.get_deref(ASSOCIATED_FILE_KEY, &self.document)?
.as_array()?)
}
fn c2pa_file_spec_object_id(&self) -> Option<ObjectId> {
self.associated_files().ok()?.iter().find_map(|value| {
let Ok(reference) = value.as_reference() else {
return None;
};
let name = self
.document
.get_object(reference)
.and_then(Object::as_dict)
.and_then(|dict| dict.get_deref(AF_RELATIONSHIP_KEY, &self.document))
.and_then(Object::as_name)
.ok()?;
(name == C2PA_RELATIONSHIP).then_some(reference)
})
}
fn remove_c2pa_file_spec_reference(&mut self) -> Result<(), Error> {
let c2pa_file_spec_reference = self
.c2pa_file_spec_object_id()
.ok_or(Error::FindingC2PAFileSpec)?;
self.document
.catalog_mut()?
.get_mut(ASSOCIATED_FILE_KEY)?
.as_array_mut()?
.retain(|v| {
let Ok(reference) = v.as_reference() else {
return true;
};
reference != c2pa_file_spec_reference
});
Ok(())
}
fn add_file_attachment_annotation(
&mut self,
file_spec_reference: ObjectId,
) -> Result<(), Error> {
let annotation = dictionary! {
"Type" => Name("Annot".into()),
"Contents" => Object::string_literal(CONTENT_CREDS),
"Name" => Object::string_literal(CONTENT_CREDS),
SUBTYPE_KEY => Name("FileAttachment".into()),
"FS" => Reference(file_spec_reference),
"Rect" => vec![0.into(), 0.into(), 10.into(), 10.into()],
};
let annotation_ref = self.document.add_object(annotation);
let first_page_ref = self
.document
.page_iter()
.next()
.ok_or(Error::AddingAnnotation)?;
let first_page = self
.document
.get_object_mut(first_page_ref)?
.as_dict_mut()?;
if !first_page.has(ANNOTATIONS_KEY) {
first_page.set(ANNOTATIONS_KEY, Array(vec![]))
}
let annotation_object = first_page.get_mut(ANNOTATIONS_KEY)?;
let annotations = if let Ok(v) = annotation_object.as_reference() {
self.document.get_object_mut(v)?
} else {
annotation_object
}
.as_array_mut()?;
annotations.push(Reference(annotation_ref));
Ok(())
}
fn push_associated_file(&mut self, embedded_file_spec_ref: ObjectId) -> Result<(), Error> {
let catalog = self.document.catalog_mut()?;
if catalog.get_mut(ASSOCIATED_FILE_KEY).is_err() {
catalog.set(ASSOCIATED_FILE_KEY, vec![]);
}
let associated_files = catalog.get_mut(ASSOCIATED_FILE_KEY)?;
let associated_files = match associated_files.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?,
_ => associated_files,
}
.as_array_mut()?;
associated_files.push(Reference(embedded_file_spec_ref));
Ok(())
}
fn add_embedded_file_specification(&mut self, file_stream_ref: ObjectId) -> ObjectId {
let embedded_file_stream = dictionary! {
AF_RELATIONSHIP_KEY => Name(C2PA_RELATIONSHIP.into()),
"Desc" => Object::string_literal(CONTENT_CREDS),
"F" => Object::string_literal(CONTENT_CREDS),
"EF" => dictionary! {
"F" => Reference(file_stream_ref),
},
TYPE_KEY => Name("FileSpec".into()),
"UF" => Object::string_literal(CONTENT_CREDS),
};
self.document.add_object(embedded_file_stream)
}
fn add_c2pa_embedded_file_stream(&mut self, bytes: Vec<u8>) -> ObjectId {
let stream = Stream::new(
dictionary! {
"F" => dictionary! {
SUBTYPE_KEY => C2PA_MIME_TYPE,
"Length" => Integer(bytes.len() as i64),
},
},
bytes,
);
self.document.add_object(stream)
}
fn remove_manifest_from_annotations(&mut self) -> Result<(), Error> {
for (_, page_id) in self.document.get_pages() {
self.document
.get_object_mut(page_id)?
.as_dict_mut()?
.get_mut(ANNOTATIONS_KEY)?
.as_array_mut()?
.retain(|obj| {
obj.as_dict()
.and_then(|annot| annot.get(TYPE_KEY))
.and_then(Object::as_name)
.map(|str| str::from_utf8(str) != Ok(CONTENT_CREDS))
.unwrap_or(true)
});
}
Ok(())
}
fn remove_manifest_from_embedded_files(&mut self) -> Result<(), Error> {
let Ok(names) = self.document.catalog_mut()?.get_mut(NAMES_KEY) else {
return Err(Error::NoManifest);
};
let names_dictionary = match names.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?,
_ => names.as_dict_mut()?,
};
let embedded_files_object = names_dictionary.get_mut(EMBEDDED_FILES_KEY)?;
let embedded_files_dictionary = match embedded_files_object.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?,
_ => embedded_files_object.as_dict_mut()?,
};
let names_vector_object = embedded_files_dictionary.get_mut(NAMES_KEY)?;
let names_vector = match names_vector_object.as_reference() {
Ok(object_id) => self.document.get_object_mut(object_id)?.as_array_mut()?,
_ => names_vector_object.as_array_mut()?,
};
let content_creds_marker_idx = names_vector
.iter()
.position(|value| {
value
.as_str()
.map(|value| str::from_utf8(value) == Ok(CONTENT_CREDS))
.unwrap_or_default()
})
.ok_or(Error::UnableToFindEmbeddedFileManifest)?;
let content_creds_reference_idx = content_creds_marker_idx + 1;
if content_creds_reference_idx >= names_vector.len() {
return Err(Error::UnableToFindEmbeddedFileManifest);
}
names_vector.drain(content_creds_marker_idx..=content_creds_reference_idx);
Ok(())
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
use super::*;
#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
use wasm_bindgen_test::*;
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_loads_pdf_from_bytes() {
let bytes = include_bytes!("../../tests/fixtures/basic.pdf");
let pdf_result = Pdf::from_bytes(bytes);
assert!(pdf_result.is_ok());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_loads_pdf_from_bytes_with_invalid_file() {
let bytes = include_bytes!("../../tests/fixtures/XCA.jpg");
let pdf_result = Pdf::from_bytes(bytes);
assert!(matches!(pdf_result, Err(Error::UnableToReadPdf(_))));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_is_password_protected() {
let bytes = include_bytes!("../../tests/fixtures/basic-password.pdf");
let pdf_result = Pdf::from_bytes(bytes).unwrap();
assert!(pdf_result.is_password_protected());
let bytes = include_bytes!("../../tests/fixtures/basic.pdf");
let pdf = Pdf::from_bytes(bytes).unwrap();
assert!(!pdf.is_password_protected());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_has_c2pa_manifest_on_file_without_manifest() {
let bytes = include_bytes!("../../tests/fixtures/basic.pdf");
let pdf = Pdf::from_bytes(bytes).unwrap();
assert!(!pdf.has_c2pa_manifest())
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_has_c2pa_manifest_on_file_with_manifest() {
let bytes = include_bytes!("../../tests/fixtures/basic.pdf");
let mut pdf = Pdf::from_bytes(bytes).unwrap();
assert!(!pdf.has_c2pa_manifest());
pdf.write_manifest_as_annotation(vec![0u8, 1u8]).unwrap();
assert!(pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_adds_embedded_file_spec_to_pdf_stream() {
let bytes = include_bytes!("../../tests/fixtures/express.pdf");
let mut pdf = Pdf::from_bytes(bytes).unwrap();
let object_count_before_add = pdf.document.objects.len();
let bytes = vec![10u8];
let id = pdf.add_c2pa_embedded_file_stream(bytes.clone());
assert_eq!(object_count_before_add + 1, pdf.document.objects.len());
let stream = pdf.document.get_object(id);
assert_eq!(stream.unwrap().as_stream().unwrap().content, bytes);
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_write_manifest_as_annotation() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/express.pdf")).unwrap();
assert!(!pdf.has_c2pa_manifest());
pdf.write_manifest_as_annotation(vec![10u8, 20u8]).unwrap();
assert!(pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_write_manifest_bytes_to_pdf_with_existing_annotations() {
let mut pdf =
Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic-annotation.pdf")).unwrap();
pdf.write_manifest_as_annotation(vec![10u8, 20u8]).unwrap();
assert!(pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_add_manifest_to_embedded_files() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
pdf.write_manifest_as_embedded_file(vec![10u8, 20u8])
.unwrap();
assert!(pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_add_manifest_to_embedded_files_attachments_present() {
let mut pdf =
Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic-attachments.pdf")).unwrap();
pdf.write_manifest_as_embedded_file(vec![10u8, 20u8])
.unwrap();
assert!(pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_save_to() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
assert!(!pdf.has_c2pa_manifest());
pdf.write_manifest_as_annotation(vec![10u8]).unwrap();
assert!(pdf.has_c2pa_manifest());
let mut saved_bytes = vec![];
pdf.save_to(&mut saved_bytes).unwrap();
let saved_pdf = Pdf::from_bytes(&saved_bytes).unwrap();
assert!(saved_pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_reads_manifest_bytes_for_embedded_files_manifest() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/express.pdf")).unwrap();
assert!(!pdf.has_c2pa_manifest());
let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8];
pdf.write_manifest_as_embedded_file(manifest_bytes.clone())
.unwrap();
assert!(pdf.has_c2pa_manifest());
assert!(matches!(
pdf.read_manifest_bytes(),
Ok(Some(manifests)) if manifests[0] == manifest_bytes
));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_reads_manifest_bytes_for_annotation_manifest() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
assert!(!pdf.has_c2pa_manifest());
let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8];
pdf.write_manifest_as_annotation(manifest_bytes.clone())
.unwrap();
assert!(pdf.has_c2pa_manifest());
assert!(matches!(
pdf.read_manifest_bytes(),
Ok(Some(manifests)) if manifests[0] == manifest_bytes
));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_read_manifest_bytes_from_pdf_without_bytes_returns_none() {
let pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
assert!(!pdf.has_c2pa_manifest());
assert!(matches!(pdf.read_manifest_bytes(), Ok(None)));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_read_manifest_bytes_from_pdf_with_other_af_relationship_returns_none() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
pdf.document
.catalog_mut()
.unwrap()
.set(ASSOCIATED_FILE_KEY, vec![Reference((100, 0))]);
assert!(matches!(pdf.read_manifest_bytes(), Ok(None)));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_read_pdf_with_associated_file_that_is_not_manifest() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
pdf.document
.catalog_mut()
.unwrap()
.set(ASSOCIATED_FILE_KEY, Reference((100, 0)));
assert!(matches!(pdf.read_manifest_bytes(), Ok(None)));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_read_xmp_on_pdf_with_none() {
let pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic-no-xmp.pdf")).unwrap();
assert_eq!(pdf.read_xmp(), None);
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_read_xmp_on_pdf_with_some_metadata() {
let pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
assert!(pdf.read_xmp().is_some());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_remove_manifest_bytes_from_file_without_c2pa_returns_error() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
assert!(matches!(
pdf.remove_manifest_bytes(),
Err(Error::NoManifest)
));
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_remove_manifest_from_file_with_annotation_based_manifest() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8];
pdf.write_manifest_as_annotation(manifest_bytes.clone())
.unwrap();
assert!(pdf.has_c2pa_manifest());
assert!(pdf.remove_manifest_bytes().is_ok());
assert!(!pdf.has_c2pa_manifest());
}
#[test]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
fn test_remove_manifest_from_file_with_embedded_file_based_manifest() {
let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap();
let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8];
pdf.write_manifest_as_embedded_file(manifest_bytes.clone())
.unwrap();
assert!(pdf.has_c2pa_manifest());
assert!(pdf.remove_manifest_bytes().is_ok());
assert!(!pdf.has_c2pa_manifest());
}
}