raves_metadata 0.0.4

A library to parse metadata from media files
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
//! # `raves_metadata`
//!
//! A library to parse and handle metadata from a variety of media file formats.
//!
//! ## Progress and Features
//!
//! This library is currently in its early stages. I'll document progress and features when that's necessary.
//!
//! <!--- TODO: see above. -->
//!
//! ## Contributing
//!
//! Contributions are welcome! Please submit PRs or issues at your leisure.
//!
//! ## License
//!
//! This project is dual-licensed under either the Apache License 2.0 or the MIT License at your option.
//!
//! For more information, please see the [`LICENSE-APACHE`](LICENSE-APACHE) and [`LICENSE-MIT`](LICENSE-MIT) files at the root of this repository.
//!
//! ## Why this project?
//!
//! I was making a gallery app for Android [called Raves](https://github.com/raves-project/raves)! However, I was having a lot of trouble finding a suitable library that did metadata parsing and editing.
//!
//! ### Oh, dang! So, why not use Exiv2?
//!
//! Exiv2 is [a great project](https://exiv2.org/) with a wonderful community! However, when trying to use it for my project, I faced some challenges. It is...
//!
//! - released under a copyleft license
//!   - ...resulting in it being less accessible for those using permissive licenses
//!   - and challenging to get working on Android (...as a dylib)
//! - written in C++
//!   - ...meaning it lacks C bindings with a proper API, so it's hard to use in Rust
//! - [not particularly portable](https://github.com/Exiv2/exiv2/issues/3040)
//!   - ...which is probably my fault, but it still scares me
//!
//! For people who don't have specific requirements, Exiv2 is an incredible option. However, it just wouldn't work for me, no matter how hard I tried.

#![forbid(unsafe_code)]

use std::sync::Arc;

use parking_lot::RwLock;

use crate::{
    exif::{Exif, error::ExifFatalError},
    iptc::{Iptc, error::IptcError},
    xmp::{Xmp, error::XmpError},
};

pub mod exif;
pub mod iptc;
pub mod magic_number;
pub mod providers;
pub mod xmp;

/// Attempts to parse the given file for any `MetadataProvider`, such as JPEG
/// or MP4.
///
/// ```
/// use raves_metadata::parse;
/// use raves_metadata::magic_number::AnyProvider;
///
/// // load and parse a file
/// # let file = include_bytes!("../assets/providers/avif/exif_xmp_after_image_blob.avif");
/// // let file = (...);
/// let maybe_provider: Option<AnyProvider> = parse(&file);
///
/// // grab its XMP metadata, for example:
/// if let Some(ref parsed) = maybe_provider {
///     let _xmp = parsed.xmp();
///
///     // use the XMP!
///     // ...
/// }
///
/// // you can also unwrap the inner type, if you want that
/// if let Some(AnyProvider::Avif(ref _avif)) = maybe_provider {
///     // use `Avif` object directly!
///     // ...
/// }
/// ```
#[inline(always)]
pub fn parse(input: &impl AsRef<[u8]>) -> Option<magic_number::AnyProvider> {
    magic_number::parse(input)
}

/// Checks the file type of the given file.
///
/// ```
/// use raves_metadata::get;
/// use raves_metadata::magic_number::MagicNumber;
///
/// // load a file and try to find its "magic number" (file type)
/// # let file = include_bytes!("../assets/providers/avif/exif_xmp_after_image_blob.avif");
/// // let file = (...);
/// let maybe_magic_number: Option<MagicNumber> = get(&file);
///
/// assert_eq!(maybe_magic_number, Some(MagicNumber::Avif));
/// ```
#[inline(always)]
pub fn get(input: &impl AsRef<[u8]>) -> Option<magic_number::MagicNumber> {
    magic_number::get(input)
}

/// A media file with support for various metadata formats.
///
/// Each file format is a "provider" - it'll yield its metadata through parsing.
pub trait MetadataProvider:
    Clone
    + core::fmt::Debug
    + Sized
    + Send
    + Sync
    + MetadataProviderRaw
    + magic_number::_MagicNumberMarker
{
    /// An error that can occur when calling [`MetadataProvider::new`].
    type ConstructionError: Clone
        + core::fmt::Debug
        + PartialEq
        + PartialOrd
        + core::error::Error
        + Sized
        + Send
        + Sync;

    /// Parses a media file for its metadata.
    fn new(input: &impl AsRef<[u8]>)
    -> Result<Self, <Self as MetadataProvider>::ConstructionError>;

    /// Parses `self` to find any Exif metadata.
    ///
    /// This returns `None` if Exif isn't supported, or if the file has no Exif
    /// metadata.
    ///
    /// The returned `Exif` struct will provide all IFDs in the metadata,
    /// meaning that separation is maintained.
    ///
    /// # Errors
    ///
    /// This will return an error if the file's metadata is malformed or
    /// corrupted.
    fn exif(&self) -> Option<Result<Arc<RwLock<Exif>>, ExifFatalError>> {
        // create helper functions.
        //
        // these are necessary since we'd otherwise duplicate the logic
        // below.
        //
        // why? because, to avoid data races, we need to check both times,
        // when we get the lock, the state of the data.
        //
        // (doing so also allows us to only `read` at first, then
        // conditionally `write`... which is nice)
        fn handle_already_parsed(
            p: &Wrapped<Exif>,
        ) -> Option<Result<Arc<RwLock<Exif>>, ExifFatalError>> {
            log::trace!("Cached Exif found! Returning...");
            Some(Ok(Arc::clone(&p.0))) // cheap clone.
        }
        fn handle_none<A>() -> Option<A> {
            log::trace!("No Exif is present in this struct. Returning early.");
            None
        }

        // if we can access the exif... do that.
        match &*self.exif_raw().read() {
            // we'll handle this case in a sec.
            Some(MaybeParsed::Raw(_)) => (),

            // already parsed, so let's return that!
            Some(MaybeParsed::Parsed(p)) => return handle_already_parsed(p),

            // there's no exif! early return.
            None => return handle_none(),
        }

        // otherwise, init the exif and return it.
        //
        // note that this re-uses the code above to avoid writing if
        // possible. (it also prevents "data race" kinda problems)
        let raw = self.exif_raw();
        let locked = &mut *raw.write();
        match locked {
            // we'll handle this case in a sec.
            Some(MaybeParsed::Raw(r)) => {
                match Exif::new(&mut r.as_slice()) {
                    // great, it worked!
                    //
                    // return the resulting exif
                    Ok(p) => {
                        let wrapped: Wrapped<Exif> = Wrapped(Arc::new(RwLock::new(p)));
                        log::trace!("Completed Exif parsing! Cached internally.");

                        if let Some(locked) = locked {
                            *locked = MaybeParsed::Parsed(wrapped.clone());
                        }
                        Some(Ok(wrapped.0))
                    }

                    // otherwise, it's an error.
                    //
                    // report it and return an Err!
                    Err(e) => {
                        log::error!("Failed to parse Exif! err: {e}");
                        *locked = None;
                        Some(Err(e))
                    }
                }
            }

            Some(MaybeParsed::Parsed(p)) => handle_already_parsed(p),
            None => handle_none(),
        }
    }

    /// Parses `self` to find any IPTC metadata.
    ///
    /// This returns `None` if IPTC isn't supported, or if the file has no IPTC
    /// metadata.
    ///
    /// All IPTC blocks are combined into one list of `(key, value)` pairs.
    ///
    /// # Errors
    ///
    /// This will return an error if the file's metadata is malformed or
    /// corrupted.
    fn iptc(&self) -> Option<Result<Arc<RwLock<Iptc>>, IptcError>> {
        log::error!(
            "Attempted to parse for IPTC, but IPTC IIC isn't \
            implemented in this library yet. \
            Returning None..."
        );
        None
    }

    /// Parses `self` to find any XMP metadata.
    ///
    /// This returns `None` if the XMP isn't supported, or if the file has no
    /// XMP metadata.
    ///
    /// # Errors
    ///
    /// This will return an error if the file's metadata is malformed or
    /// corrupted.
    fn xmp(&self) -> Option<Result<Arc<RwLock<Xmp>>, XmpError>> {
        // create helper functions.
        //
        // these are necessary since we'd otherwise duplicate the logic
        // below.
        //
        // why? because, to avoid data races, we need to check both times,
        // when we get the lock, the state of the data.
        //
        // (doing so also allows us to only `read` at first, then
        // conditionally `write`... which is nice)
        fn handle_already_parsed(p: &Wrapped<Xmp>) -> Option<Result<Arc<RwLock<Xmp>>, XmpError>> {
            log::trace!("Cached XMP found! Returning...");
            Some(Ok(Arc::clone(&p.0))) // cheap clone.
        }
        fn handle_none<A>() -> Option<A> {
            log::trace!("No XMP is present in this struct. Returning early.");
            None
        }

        // if we can access the xmp... do that.
        match &*self.xmp_raw().read() {
            // we'll handle this case in a sec.
            Some(MaybeParsed::Raw(_)) => (),

            // already parsed, so let's return that!
            Some(MaybeParsed::Parsed(p)) => return handle_already_parsed(p),

            // there's no xmp! early return.
            None => return handle_none(),
        }

        // otherwise, init the xmp and return it.
        //
        // note that this re-uses the code above to avoid writing if
        // possible. (it also prevents "data race" kinda problems)
        let raw = self.xmp_raw();
        let locked = &mut *raw.write();
        match locked {
            // we'll handle this case in a sec.
            Some(MaybeParsed::Raw(r)) => {
                // try parsing as str, then map into xmp
                let creation_result: Result<Xmp, XmpError> = core::str::from_utf8(r)
                    .map_err(|e| {
                        log::error!("XMP was not in UTF-8 format! err: {e}");
                        XmpError::NotUtf8
                    })
                    .and_then(Xmp::new);

                match creation_result {
                    // great, it worked!
                    //
                    // return the resulting xmp
                    Ok(p) => {
                        let wrapped: Wrapped<Xmp> = Wrapped(Arc::new(RwLock::new(p)));
                        log::trace!("Completed XMP parsing! Cached internally.");

                        if let Some(locked) = locked {
                            *locked = MaybeParsed::Parsed(wrapped.clone());
                        }
                        Some(Ok(wrapped.0))
                    }

                    // otherwise, it's an error.
                    //
                    // report it and return an Err!
                    Err(e) => {
                        log::error!("Failed to parse XMP! err: {e}");
                        *locked = None;
                        Some(Err(e))
                    }
                }
            }

            Some(MaybeParsed::Parsed(p)) => handle_already_parsed(p),
            None => handle_none(),
        }
    }

    /// Indicates whether the given input matches the magic number of this
    /// provider.
    ///
    /// `input` only needs to be as long as the magic number -- don't shove 40
    /// GiB memmap'd files in here.
    ///
    /// # Returns
    ///
    /// - `true` if `input` matches the expected magic number (signature).
    /// - Otherwise, `false`.
    ///
    /// Note that this is fallible, as any arbitrary byte slice could have the
    /// expected signature. However, this method will never panic.
    fn magic_number(input: &[u8]) -> bool;
}

/// Raw helpers for [`MetadataProvider`] implementors.
///
/// You may or may not find these methods useful, as they tend to deal
/// primarily with field access of internal metadata standards' buffers.
///
/// However, if you wish to modify these directly, or just immediately take
/// the metadata as their raw types, you can use these methods instead!
pub trait MetadataProviderRaw {
    /// Returns the raw `Option<MaybeParsedExif>` stored inside the provider.
    ///
    /// Used primarily to implement the [`MetadataProvider::exif`] method
    /// easily.
    ///
    /// However, users may also prefer it if they'd like to use the raw data
    /// exactly as-is.
    fn exif_raw(&self) -> Arc<RwLock<Option<MaybeParsedExif>>> {
        Arc::new(const { RwLock::new(None) })
    }

    /// Returns the raw `Option<MaybeParsedXmp>` stored inside the provider.
    ///
    /// Used primarily to implement the [`MetadataProvider::xmp`] method
    /// easily.
    ///
    /// However, users may also prefer it if they'd like to use the raw data
    /// exactly as-is.
    fn xmp_raw(&self) -> Arc<RwLock<Option<MaybeParsedXmp>>> {
        Arc::new(const { RwLock::new(None) })
    }
}

/// Metadata that might have been parsed already.
///
/// This type allows for caching metadata such that media files are not
/// reprocessed each additional time their parse methods are called.
///
/// ## Generics
///
/// - `R`: Raw
/// - `P`: Parsed
///
/// ## Why?
///
/// `MaybeParsed::Parsed` metadata can be edited! >:)
#[derive(Clone, Debug, PartialEq, PartialOrd, Hash)]
pub enum MaybeParsed<R, P>
where
    R: Clone + core::fmt::Debug + PartialEq + PartialOrd + core::hash::Hash,
    P: Clone + core::fmt::Debug + PartialEq + PartialOrd + core::hash::Hash,
{
    /// Raw metadata that hasn't been processed.
    Raw(R),

    /// Metadata that's been parsed into its contents.
    Parsed(Wrapped<P>),
}

#[expect(missing_docs)]
pub type MaybeParsedExif = MaybeParsed<Vec<u8>, Exif>;
#[expect(missing_docs)]
pub type MaybeParsedIptc = MaybeParsed<Vec<u8>, Iptc>;
#[expect(missing_docs)]
pub type MaybeParsedXmp = MaybeParsed<Vec<u8>, Xmp>;

/// A wrapper struct around metadata standard types.
///
/// These provide an easy derive for the [`MaybeParsed`] type above. It
/// should never be returned in non-raw interfaces.
#[derive(Clone, Debug)]
pub struct Wrapped<P: PartialEq + PartialOrd + core::hash::Hash>(
    /// The wrapped value.
    ///
    /// This should be a standard, like [`crate::xmp::Xmp`].
    pub Arc<RwLock<P>>,
);

// implement those traits below for ez derives on providers
impl<P: PartialEq + PartialOrd + core::hash::Hash> PartialEq for Wrapped<P> {
    fn eq(&self, other: &Self) -> bool {
        Arc::ptr_eq(&self.0, &other.0)
    }
}
impl<P: PartialEq + PartialOrd + core::hash::Hash> PartialOrd for Wrapped<P> {
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
        (Arc::as_ptr(&self.0)).partial_cmp(&(Arc::as_ptr(&other.0)))
    }
}
impl<P: PartialEq + PartialOrd + core::hash::Hash> core::hash::Hash for Wrapped<P> {
    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
        (Arc::as_ptr(&self.0) as usize).hash(state);
    }
}

/// Internal utility methods.
pub(crate) mod util {
    /// Helper function to initialize the logger for testing.
    #[cfg(test)]
    pub fn logger() {
        _ = env_logger::builder()
            .is_test(true)
            .filter_level(log::LevelFilter::max())
            .format_file(true)
            .format_line_number(true)
            .try_init();
    }
}