c2pa 0.89.0

Rust SDK for C2PA (Coalition for Content Provenance and Authenticity) implementors
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
// Copyright 2024 Adobe. All rights reserved.
// This file is licensed to you under the Apache License,
// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
// or the MIT license (http://opensource.org/licenses/MIT),
// at your option.

// Unless required by applicable law or agreed to in writing,
// this software is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or
// implied. See the LICENSE-MIT and LICENSE-APACHE files for the
// specific language governing permissions and limitations under
// each license.

#[cfg(feature = "file_io")]
use std::path::PathBuf;
use std::{
    ffi::OsStr,
    io::{Read, Seek, SeekFrom, Write},
    path::Path,
};

#[allow(unused)] // different code path for WASI
use tempfile::{tempdir, Builder, NamedTempFile, SpooledTempFile, TempDir};

use crate::{asset_io::rename_or_move, Error, Result};

// Replace data at arbitrary location and len in a file.
// start_location is where the replacement data will start
// replace_len is how many bytes from source to replaced starting a start_location
// data is the data that will be inserted at start_location
#[allow(dead_code)]
pub(crate) fn patch_data_in_file(
    source_path: &Path,
    start_location: u64,
    replace_len: u64,
    data: &[u8],
) -> Result<()> {
    let mut source = std::fs::File::open(source_path)?;
    let mut dest = tempfile_builder("c2pa_temp")?;

    patch_stream(&mut source, &mut dest, start_location, replace_len, data)?;

    rename_or_move(dest, source_path)?;

    Ok(())
}

// Insert data at arbitrary location in a stream.
// location is from the start of the source stream
#[allow(dead_code)]
pub(crate) fn insert_data_at<R: Read + Seek, W: Write>(
    source: &mut R,
    dest: &mut W,
    location: u64,
    data: &[u8],
) -> Result<()> {
    source.rewind()?;

    let mut before_handle = source.take(location);

    std::io::copy(&mut before_handle, dest)?;

    // write out the data
    dest.write_all(data)?;

    // write out the rest of the source
    let source = before_handle.into_inner();
    source.seek(SeekFrom::Start(location))?;
    std::io::copy(source, dest)?;

    Ok(())
}

// Replace data at arbitrary location and len in a stream.
// start_location is where the replacement data will start
// replace_len is how many bytes from source to replaced starting a start_location
// data is the data that will be inserted at start_location
#[allow(dead_code)]
pub(crate) fn patch_stream<R: Read + Seek + ?Sized, W: Write + ?Sized>(
    source: &mut R,
    dest: &mut W,
    start_location: u64,
    replace_len: u64,
    data: &[u8],
) -> Result<()> {
    source.rewind()?;
    let source_len = stream_len(source)?;

    if start_location + replace_len > source_len {
        return Err(Error::BadParam("read past end of source stream".into()));
    }

    let mut before_handle = source.take(start_location);

    // copy data before start location
    std::io::copy(&mut before_handle, dest)?;

    // write out new data
    dest.write_all(data)?;

    // write out the rest of the source skipping the bytes we wanted to replace
    let source = before_handle.into_inner();
    source.seek(SeekFrom::Start(start_location + replace_len))?;
    std::io::copy(source, dest)?;

    Ok(())
}

// Returns length of the stream, stream position is preserved
#[allow(dead_code)]
pub(crate) fn stream_len<R: Read + Seek + ?Sized>(reader: &mut R) -> Result<u64> {
    let old_pos = reader.stream_position()?;
    let len = reader.seek(SeekFrom::End(0))?;

    if old_pos != len {
        reader.seek(SeekFrom::Start(old_pos))?;
    }

    Ok(len)
}

#[cfg(target_arch = "wasm32")]
pub(crate) fn stream_with_fs_fallback(
    _threshold_mb: usize,
    _expected_size_bytes: u64,
) -> Result<impl Read + Write + Seek> {
    Ok(std::io::Cursor::new(Vec::new()))
}

/// Will create a [`Read`]-, [`Write`]-, and [`Seek`]-capable stream that will
/// stay in memory unless a threshold size is exceeded.
///
/// # Parameters
/// - `threshold`: Size (in MB) of stream beyond which an on-disk stream will be used.
///   This threshold should be the one specified in settings under
///   `core.backing_store_memory_threshold_in_mb`.
/// - `expected_size_bytes`: Size (in bytes) of the stream. If this exceeds the threshold
///   it will return a file-backed stream directly.
///
/// # Errors
/// - Returns an error if the threshold value from settings is not valid.
///
/// # Note
/// This will always return an in-memory stream when the compilation target doesn't
/// support file I/O.
#[cfg(not(target_arch = "wasm32"))]
pub(crate) fn stream_with_fs_fallback(
    threshold_mb: usize,
    expected_size_bytes: u64,
) -> Result<impl Read + Write + Seek> {
    let threshold_bytes = threshold_mb.saturating_mul(1024 * 1024);
    let mut spooled = SpooledTempFile::new(threshold_bytes);
    if expected_size_bytes > threshold_bytes as u64 {
        spooled.roll()?;
    }
    Ok(spooled)
}

// Write adapter that caps output at `max_len` bytes, preventing decompression
// bombs and similar unbounded-output attacks.
pub(crate) struct BoundedVecWriter {
    inner: Vec<u8>,
    max_len: usize,
}

impl BoundedVecWriter {
    pub(crate) fn new(max_len: usize) -> Result<Self> {
        Ok(Self {
            inner: safe_vec(max_len as u64, None)?,
            max_len,
        })
    }

    pub(crate) fn into_inner(self) -> Vec<u8> {
        self.inner
    }
}

impl Write for BoundedVecWriter {
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        if self.inner.len().saturating_add(buf.len()) > self.max_len {
            return Err(std::io::Error::other("output exceeds maximum size"));
        }
        self.inner.extend_from_slice(buf);
        Ok(buf.len())
    }

    fn flush(&mut self) -> std::io::Result<()> {
        Ok(())
    }
}

// Returns a new Vec first making sure it can hold the desired capacity.  Fill
// with default value if provided
pub(crate) fn safe_vec<T: Clone>(item_cnt: u64, init_with: Option<T>) -> Result<Vec<T>> {
    let num_items = usize::try_from(item_cnt)?;

    // make sure we can allocate vec
    let mut output: Vec<T> = Vec::new();
    output
        .try_reserve_exact(num_items)
        .map_err(|_e| Error::InsufficientMemory)?;

    // fill if requested
    if let Some(i) = init_with {
        output.resize(num_items, i);
    }

    Ok(output)
}

pub trait ReaderUtils {
    // Reads contents from a stream making sure if can be done and will fit within available memory
    fn read_to_vec(&mut self, data_len: u64) -> Result<Vec<u8>>;
}

// Provide implementation for any object that support Read + Seek
impl<R: Read + Seek> ReaderUtils for R {
    fn read_to_vec(&mut self, data_len: u64) -> Result<Vec<u8>> {
        let old_pos = self.stream_position()?;
        let len = self.seek(SeekFrom::End(0))?;

        // reset seek pointer
        if old_pos != len {
            self.seek(SeekFrom::Start(old_pos))?;
        }

        if old_pos
            .checked_add(data_len)
            .ok_or(Error::BadParam("source stream read out of range".into()))?
            > len
        {
            return Err(Error::BadParam("read past end of source stream".into()));
        }

        // make sure we can allocate vec
        let mut output: Vec<u8> = safe_vec(data_len, None)?;

        self.take(data_len).read_to_end(&mut output)?;

        Ok(output)
    }
}

pub(crate) fn tempfile_builder<T: AsRef<OsStr> + Sized>(prefix: T) -> Result<NamedTempFile> {
    #[cfg(all(target_os = "wasi", target_env = "p1"))]
    return Err(Error::NotImplemented(
        "tempfile_builder requires wasip2 or later".to_string(),
    ));

    #[cfg(all(target_os = "wasi", not(target_env = "p1")))]
    return Builder::new()
        .prefix(&prefix)
        .rand_bytes(5)
        .tempfile_in("/")
        .map_err(Error::IoError);

    #[cfg(not(target_os = "wasi"))]
    return Builder::new()
        .prefix(&prefix)
        .rand_bytes(5)
        .tempfile()
        .map_err(Error::IoError);
}

#[allow(dead_code)] // used in tests
pub(crate) fn tempdirectory() -> Result<TempDir> {
    #[cfg(target_os = "wasi")]
    return TempDir::new_in("/").map_err(Error::IoError);

    #[cfg(not(target_os = "wasi"))]
    return tempdir().map_err(Error::IoError);
}

/// Convert a URI to a file path using PathBuf for better path handling.
#[cfg(feature = "file_io")]
pub fn uri_to_path(uri: &str, manifest_label: Option<&str>) -> PathBuf {
    let mut path_str = uri.replace(':', "_");
    if let Some(stripped) = path_str.strip_prefix("self#jumbf=") {
        path_str = stripped.to_owned();
    } else {
        return PathBuf::from(path_str);
    }

    let mut path = PathBuf::from(path_str);

    if let Ok(stripped) = path.strip_prefix("/c2pa/") {
        path = stripped.to_path_buf();
    } else if let Some(manifest_label) = manifest_label {
        let mut new_path = PathBuf::from(manifest_label.replace(':', "_"));
        new_path.push(path);
        path = new_path;
    }

    path
}

#[cfg(test)]
mod tests {
    #![allow(clippy::expect_used)]
    #![allow(clippy::unwrap_used)]

    use std::io::Cursor;

    #[cfg(feature = "file_io")]
    #[test]
    fn test_uri_to_path() {
        let uri = "self#jumbf=/c2pa/urn:uuid:b3386820-9994-4b58-926f-1c47b82504c4:contentauth/c2pa.assertions/c2pa.thumbnail.claim.jpeg";
        let expected_path = "urn_uuid_b3386820-9994-4b58-926f-1c47b82504c4_contentauth/c2pa.assertions/c2pa.thumbnail.claim.jpeg";

        assert_eq!(uri_to_path(uri, None), PathBuf::from(expected_path));
        assert_eq!(
            uri_to_path(expected_path, None),
            PathBuf::from(expected_path)
        );

        let uri = "self#jumbf=c2pa.assertions/c2pa.thumbnail.claim";
        let manifest_label = "test";
        let expected_path = format!("{manifest_label}/c2pa.assertions/c2pa.thumbnail.claim");

        assert_eq!(
            uri_to_path(uri, Some(manifest_label)),
            PathBuf::from(&expected_path)
        );
        assert_eq!(
            uri_to_path(&expected_path, Some(manifest_label)),
            PathBuf::from(expected_path)
        );

        // Test manifest label with colon replacement
        let uri = "self#jumbf=c2pa.assertions/c2pa.thumbnail.claim";
        let manifest_label_with_colon = "urn:uuid:test:label";
        let expected_path_with_colon = "urn_uuid_test_label/c2pa.assertions/c2pa.thumbnail.claim";

        assert_eq!(
            uri_to_path(uri, Some(manifest_label_with_colon)),
            PathBuf::from(expected_path_with_colon)
        );
    }

    //use env_logger;
    use super::*;
    #[test]
    fn test_patch_stream() {
        let source = "this is a very very good test";

        // test truncation
        let mut output = Vec::new();
        patch_stream(&mut Cursor::new(source.as_bytes()), &mut output, 10, 5, &[]).unwrap();
        assert_eq!(&output, "this is a very good test".as_bytes());

        // test truncation with new data
        let mut output = Vec::new();
        patch_stream(
            &mut Cursor::new(source.as_bytes()),
            &mut output,
            10,
            14,
            "so so".as_bytes(),
        )
        .unwrap();
        assert_eq!(&output, "this is a so so test".as_bytes());

        // test insertion, leaving existing data
        let mut output = Vec::new();
        patch_stream(
            &mut Cursor::new(source.as_bytes()),
            &mut output,
            10,
            0,
            "very ".as_bytes(),
        )
        .unwrap();
        assert_eq!(&output, "this is a very very very good test".as_bytes());

        // test replacement of data
        let mut output = Vec::new();
        patch_stream(
            &mut Cursor::new(source.as_bytes()),
            &mut output,
            0,
            29,
            "all new data".as_bytes(),
        )
        .unwrap();
        assert_eq!(&output, "all new data".as_bytes());

        // test removal of all data
        let mut output = Vec::new();
        patch_stream(&mut Cursor::new(source.as_bytes()), &mut output, 0, 29, &[]).unwrap();
        assert_eq!(&output, "".as_bytes());

        // test replacement of too much data
        let mut output = Vec::new();
        assert!(patch_stream(
            &mut Cursor::new(source.as_bytes()),
            &mut output,
            10,
            29,
            &[],
        )
        .is_err());
    }
}