c2pa 0.80.1

Rust SDK for C2PA (Coalition for Content Provenance and Authenticity) implementors
Documentation
// Copyright 2022 Adobe. All rights reserved.
// This file is licensed to you under the Apache License,
// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
// or the MIT license (http://opensource.org/licenses/MIT),
// at your option.

// Unless required by applicable law or agreed to in writing,
// this software is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or
// implied. See the LICENSE-MIT and LICENSE-APACHE files for the
// specific language governing permissions and limitations under
// each license.

use std::{io::Cursor, str};

use log::error;
use quick_xml::{
    events::{BytesStart, Event},
    name::QName,
    Reader, Writer,
};

use crate::{asset_io::CAIRead, jumbf_io::get_cailoader_handler, Error, Result};

const RDF_DESCRIPTION: &[u8] = b"rdf:Description";
const XMP_END: &[u8] = b"<?xpacket end=\"w\"?>";

pub const MIN_XMP: &str = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?><x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 6.0.0"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about=""  xmlns:xmp="http://ns.adobe.com/xap/1.0/" xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmpMM:DocumentID="xmp.did:cb9f5498-bb58-4572-8043-8c369e6bfb9b" xmpMM:InstanceID="xmp.iid:cb9f5498-bb58-4572-8043-8c369e6bfb9b"> </rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>"#;

#[derive(Default)]
pub struct XmpInfo {
    pub document_id: Option<String>,
    pub instance_id: Option<String>,
    pub provenance: Option<String>,
}

impl XmpInfo {
    /// search xmp data for provenance, documentID and instanceID
    pub fn from_source(source: &mut dyn CAIRead, format: &str) -> Self {
        let xmp = get_cailoader_handler(format).and_then(|cai_loader| {
            // read xmp if available
            cai_loader.read_xmp(source)
        });

        // todo: do this in one pass through XMP
        let provenance = xmp.as_deref().and_then(extract_provenance);
        let document_id = xmp.as_deref().and_then(extract_document_id);
        let instance_id = xmp.as_deref().and_then(extract_instance_id);
        Self {
            document_id,
            instance_id,
            provenance,
        }
    }
}

/// Extract an a value from XMP using a key
fn extract_xmp_key(xmp: &str, key: &str) -> Option<String> {
    let mut reader = Reader::from_str(xmp);
    reader.config_mut().trim_text(true);

    loop {
        match reader.read_event() {
            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
                if e.name() == QName(RDF_DESCRIPTION) {
                    // attribute case
                    let value = e.attributes().find(|a| {
                        if let Ok(attribute) = a {
                            attribute.key == QName(key.as_bytes())
                        } else {
                            false
                        }
                    });
                    if let Some(Ok(attribute)) = value {
                        if let Ok(s) = String::from_utf8(attribute.value.to_vec()) {
                            return Some(s);
                        }
                    }
                } else if e.name() == QName(key.as_bytes()) {
                    // tag case
                    if let Ok(s) = reader.read_text(e.name()) {
                        return Some(s.to_string());
                    }
                }
            }
            Ok(Event::Eof) => break,
            _ => {}
        }
    }
    None
}

// writes the event to the writer)
/// Add a value to XMP using a key, replaces the value if the key exists, or adds it only once
fn add_xmp_key(xmp: &str, key: &str, value: &str) -> Result<String> {
    let orig_length = xmp.len();

    // Minimal padding should be 2 KB to 4 KB. This is used if no XMP packet end is found.
    // If no packet end is found and the original length is less than 4096, the packet will be
    // extended.
    let mut target_length = orig_length.max(4096);
    // Remove the ending xpacket if present for easier manipulation
    let xpacket_end = "<?xpacket end";
    let xpacket_end_length = XMP_END.len();
    let xmp_body = if let Some(pos) = xmp.rfind(xpacket_end) {
        target_length = orig_length - xpacket_end_length;
        xmp[..pos].trim_end()
    } else {
        xmp
    };

    let mut reader = Reader::from_str(xmp_body);
    // Do not trim text to preserve whitespace
    reader.config_mut().trim_text(false);
    reader.config_mut().expand_empty_elements = false;
    let mut writer = Writer::new(Cursor::new(Vec::new()));
    let mut added = false;
    loop {
        let event = reader
            .read_event()
            .map_err(|e| Error::XmpReadError(e.to_string()))?;
        // println!("{:?}", event);
        match event {
            Event::Start(ref e) if e.name() == QName(RDF_DESCRIPTION) && !added => {
                // creates a new element
                let mut elem = BytesStart::from_content(
                    String::from_utf8_lossy(RDF_DESCRIPTION),
                    RDF_DESCRIPTION.len(),
                );

                for attr in e.attributes() {
                    match attr {
                        Ok(attr) => {
                            if attr.key == QName(key.as_bytes()) {
                                // replace the key/value if it exists
                                elem.push_attribute((key, value));
                                added = true;
                            } else {
                                // add all other existing elements
                                elem.extend_attributes([attr]);
                            }
                        }
                        Err(e) => {
                            error!("Error at position {}", reader.buffer_position());
                            return Err(Error::XmpReadError(e.to_string()));
                        }
                    }
                }
                if !added {
                    // didn't exist, so add it
                    elem.push_attribute((key, value));
                    added = true;
                }
                // writes the event to the writer
                writer
                    .write_event(Event::Start(elem))
                    .map_err(|e| Error::XmpWriteError(e.to_string()))?;
            }
            Event::Empty(ref e) if e.name() == QName(RDF_DESCRIPTION) && !added => {
                // creates a new element
                let mut elem = BytesStart::from_content(
                    String::from_utf8_lossy(RDF_DESCRIPTION),
                    RDF_DESCRIPTION.len(),
                );
                for attr in e.attributes() {
                    match attr {
                        Ok(attr) => {
                            if attr.key == QName(key.as_bytes()) {
                                // replace the key/value if it exists
                                elem.push_attribute((key, value));
                                added = true;
                            } else {
                                // add all other existing elements
                                elem.extend_attributes([attr]);
                            }
                        }
                        Err(e) => {
                            error!("Error at position {}", reader.buffer_position());
                            return Err(Error::XmpReadError(e.to_string()));
                        }
                    }
                }
                if !added {
                    // didn't exist, so add it
                    elem.push_attribute((key, value));
                    added = true;
                }
                // writes the event to the writer
                writer
                    .write_event(Event::Empty(elem))
                    .map_err(|e| Error::XmpWriteError(e.to_string()))?;
            }
            Event::Eof => break,
            e => {
                writer
                    .write_event(e)
                    .map_err(|e| Error::XmpWriteError(e.to_string()))?;
            }
        }
    }
    // Maintain XMP packet length with padding if possible.
    let padding_length = target_length.saturating_sub(writer.get_ref().get_ref().len());
    write_xmp_padding(&mut writer.get_mut(), padding_length)?;
    let result = writer.into_inner().into_inner();
    String::from_utf8(result).map_err(|e| Error::XmpWriteError(e.to_string()))
}

/// extract the dc:provenance value from xmp
pub fn extract_provenance(xmp: &str) -> Option<String> {
    extract_xmp_key(xmp, "dcterms:provenance")
}

/// extract the xmpMM:InstanceID value from xmp
fn extract_instance_id(xmp: &str) -> Option<String> {
    extract_xmp_key(xmp, "xmpMM:InstanceID")
}

/// extract the "xmpMM:DocumentID" value from xmp
fn extract_document_id(xmp: &str) -> Option<String> {
    extract_xmp_key(xmp, "xmpMM:DocumentID")
}

/// add or replace a dc:provenance value to xmp, including dc:terms if needed
pub fn add_provenance(xmp: &str, provenance: &str) -> Result<String> {
    let xmp = add_xmp_key(xmp, "xmlns:dcterms", "http://purl.org/dc/terms/")?;
    add_xmp_key(&xmp, "dcterms:provenance", provenance)
}

// According to the XMP Spec, the recommended practice is to use the ASCII space
// character (U+0020) in the appropriate encoding, with a newline about every 100 characters
fn write_xmp_padding<W: std::io::Write>(writer: &mut W, len: usize) -> std::io::Result<()> {
    let mut remaining = len;
    // Add newline before trailer no matter what.
    writer.write_all(b"\n")?;
    remaining = remaining.saturating_sub(1);

    if remaining > 0 {
        // Account for final newline since we are adding whitespace.
        remaining -= 1;
        while remaining > 0 {
            let chunk = remaining.min(99);
            writer.write_all(&vec![b' '; chunk])?;
            remaining -= chunk;
            if remaining > 0 {
                writer.write_all(b"\n")?;
                remaining -= 1;
            }
        }
        writer.write_all(b"\n")?;
    }
    writer.write_all(XMP_END)?;
    Ok(())
}

#[cfg(test)]
mod tests {
    #![allow(clippy::expect_used)]
    #![allow(clippy::unwrap_used)]

    //use env_logger;
    use c2pa_macros::c2pa_test_async;
    #[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
    use wasm_bindgen_test::wasm_bindgen_test;

    use super::*;
    use crate::{
        asset_handlers::jpeg_io::JpegIO,
        asset_io::{AssetIO, RemoteRefEmbedType},
    };

    const XMP_DATA: &str = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
    <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="contentauth">
        <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
            <rdf:Description rdf:about=""
                    xmlns:xmp="http://ns.adobe.com/xap/1.0/"
                    xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/"
                    xmlns:dc="http://purl.org/dc/elements/1.1/"
                    xmlns:dcterms="http://purl.org/dc/terms/"
                xmpMM:DocumentID="xmp.did:cb9f5498-bb58-4572-8043-8c369e6bfb9b"
                xmpMM:InstanceID="xmp.iid:cb9f5498-bb58-4572-8043-8c369e6bfb9b"
                dcterms:provenance="self#jumbf=c2pa/contentauth:urn:uuid:a58065fb-79ae-4eb3-87b9-a19830860059/c2pa.claim"
                dc:format="image/jpeg">
            </rdf:Description>
        </rdf:RDF>
    </x:xmpmeta>"#;

    const PROVENANCE: &str =
        "self#jumbf=c2pa/contentauth:urn:uuid:a58065fb-79ae-4eb3-87b9-a19830860059/c2pa.claim";

    #[test]
    fn read_xmp() {
        let provenance = extract_provenance(XMP_DATA);
        assert_eq!(provenance, Some("self#jumbf=c2pa/contentauth:urn:uuid:a58065fb-79ae-4eb3-87b9-a19830860059/c2pa.claim".to_owned()));
        let document_id = extract_document_id(XMP_DATA);
        assert_eq!(
            document_id,
            Some("xmp.did:cb9f5498-bb58-4572-8043-8c369e6bfb9b".to_owned())
        );
        let instance_id = extract_instance_id(XMP_DATA);
        assert_eq!(
            instance_id,
            Some("xmp.iid:cb9f5498-bb58-4572-8043-8c369e6bfb9b".to_owned())
        );
        let unicorn = extract_xmp_key(XMP_DATA, "unicorn");
        assert_eq!(unicorn, None);
        let bad_xmp = extract_xmp_key("bad xmp", "unicorn");
        assert_eq!(bad_xmp, None);
    }

    #[test]
    fn add_xmp() {
        let xmp = add_provenance(XMP_DATA, PROVENANCE).expect("adding provenance");
        let unicorn = extract_provenance(&xmp);
        println!("{xmp}");
        assert_eq!(unicorn, Some(PROVENANCE.to_string()));

        let xmp = add_provenance(MIN_XMP, PROVENANCE).expect("adding provenance");
        let unicorn = extract_provenance(&xmp);
        println!("{xmp}");
        assert_eq!(unicorn, Some(PROVENANCE.to_string()));
    }

    #[c2pa_test_async]
    async fn test_broken_xmp_read_write_stream() {
        let source_bytes = include_bytes!("../../tests/fixtures/IMG_0003.jpg");
        let test_msg = "https://cai-manifests-stage.adobe.com/manifests/urn-c2pa-0ab6e8b8-5c28-4ef1-8f58-86c21f0349bf-adobe";

        let handler = JpegIO::new("");

        let assetio_handler = handler.get_handler("jpg");

        let remote_ref_handler = assetio_handler.remote_ref_writer_ref().unwrap();

        let mut source_stream = Cursor::new(source_bytes.to_vec());
        let mut output_stream = Cursor::new(Vec::new());

        let original_xmp_length = assetio_handler
            .get_reader()
            .read_xmp(&mut source_stream)
            .unwrap()
            .len();
        source_stream.set_position(0);

        remote_ref_handler
            .embed_reference_to_stream(
                &mut source_stream,
                &mut output_stream,
                RemoteRefEmbedType::Xmp(test_msg.to_string()),
            )
            .unwrap();

        output_stream.set_position(0);

        // read back in XMP
        let read_xmp = assetio_handler
            .get_reader()
            .read_xmp(&mut output_stream)
            .unwrap();

        output_stream.set_position(0);

        //std::fs::write("../target/xmp_write.jpg", output_stream.into_inner()).unwrap();

        assert!(read_xmp.contains(test_msg));
        assert_eq!(read_xmp.len(), original_xmp_length);
    }
}