opusmeta/
lib.rs

1#![allow(clippy::module_name_repetitions)]
2
3//! opusmeta is a Rust crate for reading and writing metadata from opus files.
4//!
5//! See the `read_tags` example file for basic usage.
6//!
7//! Unlike the more structured ID3 format, the Opus spec does not mandate a set of tag names
8//! or formatting for values. However, a list of common tag names can be found
9//! [here](https://xiph.org/vorbis/doc/v-comment.html).
10//!
11//! For reading and writing picture data, opusmeta uses the
12//! [METADATA_BLOCK_PICTURE](https://wiki.xiph.org/VorbisComment#Cover_art) proposal, which is supported by common players like ffmpeg and vlc.
13
14pub mod iter;
15pub mod picture;
16mod utils;
17
18use iter::{CommentsIterator, PicturesIterator};
19use ogg::{PacketReader, PacketWriteEndInfo, PacketWriter};
20use picture::{Picture, PictureError, PictureType};
21use std::collections::HashMap;
22use std::fs::File;
23use std::fs::OpenOptions;
24use std::io::Cursor;
25use std::io::{Read, Seek, Write};
26use std::path::Path;
27use thiserror::Error;
28
29pub use utils::LowercaseString;
30
31/// Error type.
32///
33/// Encapsulates every error that could occur in the usage of this crate.
34#[derive(Error, Debug)]
35#[non_exhaustive]
36pub enum Error {
37    /// Failed to read an ogg packet, or the file is not an ogg file
38    #[error("{0}")]
39    ReadError(#[from] ogg::OggReadError),
40    /// The selected file is an ogg file, but not an opus file.
41    #[error("The selected file is not an opus file")]
42    NotOpus,
43    /// Expected a packet (for example, the comment header packet), but the stream ended early
44    #[error("Expected a packet but did not receive one")]
45    MissingPacket,
46    /// An error occured while trying to execute an io operation. If the underlying `ErrorKind` is a
47    /// [`ErrorKind::UnexpectedEof`](std::io::ErrorKind::UnexpectedEof), then it usually means that
48    /// a piece of data, either an ogg packet or an encoded image, was shorter than expected by the
49    /// spec.
50    #[error("The comment header was malformed: {0}")]
51    DataError(#[from] std::io::Error),
52    /// A comment was not in TAG=VALUE format. The offending line in the comment header is provided
53    /// for convenience.
54    #[error("Encountered a comment which was not in TAG=VALUE format.")]
55    MalformedComment(String),
56    /// Expected valid UTF-8 data as mandated by the spec, but did not receive it. The underlying
57    /// `FromUtf8Error` provides the offending bytes for conveniece.
58    #[error(
59        "Expected valid UTF-8, but did not receive it. See the contained FromUtf8Error for the offending bytes."
60    )]
61    UTFError(#[from] std::string::FromUtf8Error),
62    /// The content was too big for the opus spec (e.g. is more than [`u32::MAX`] bytes long). Since
63    /// [`u32::MAX`] bytes is almost 4.3 GB, this error should almost never occur.
64    #[error("The content was too big for the Opus spec")]
65    TooBigError,
66    /// An error occured while encoding or decoding a [`Picture`]. See [`PictureError`] for more info.
67    #[error("An error occured while encoding or decoding a picture: {0}")]
68    PictureError(#[from] PictureError),
69    /// Raised if the platform's `usize` is smaller than 32 bits. This error is raised because
70    /// the opus spec uses u32 for lengths, but Rust uses usize instead.
71    #[error("This crate expects `usize` to be at least 32 bits in size.")]
72    PlatformError(#[from] std::num::TryFromIntError),
73}
74
75pub type Result<T> = std::result::Result<T, Error>;
76
77const PICTURE_BLOCK_TAG: &str = "metadata_block_picture";
78
79/// Stores Opus comments.
80#[derive(Debug, Default)]
81pub struct Tag {
82    vendor: String,
83    comments: HashMap<String, Vec<String>>,
84}
85
86impl Tag {
87    /// Create a new tag from a vendor string and a list of comments.
88    #[must_use]
89    pub fn new(vendor: String, comments: Vec<(String, String)>) -> Self {
90        let mut comments_map = HashMap::new();
91        for (mut key, value) in comments {
92            key.make_ascii_lowercase();
93            comments_map.entry(key).or_insert_with(Vec::new).push(value);
94        }
95
96        Self {
97            vendor,
98            comments: comments_map,
99        }
100    }
101
102    /// Add one entry.
103    pub fn add_one(&mut self, tag: LowercaseString, value: String) {
104        self.comments.entry(tag.0).or_default().push(value);
105    }
106
107    /// Add multiple entries.
108    pub fn add_many(&mut self, tag: LowercaseString, mut values: Vec<String>) {
109        self.comments
110            .entry(tag.0)
111            .and_modify(|v: &mut Vec<String>| v.append(&mut values))
112            .or_insert(values);
113    }
114
115    /// Get all entries for a particular key, or None if no occurrences of the key exist.
116    #[must_use]
117    pub fn get(&self, tag: &LowercaseString) -> Option<&Vec<String>> {
118        self.comments.get(tag.0.as_str())
119    }
120
121    /// Gets the first entry for a particular key, or None if no occurences of the key exist.
122    #[must_use]
123    pub fn get_one(&self, tag: &LowercaseString) -> Option<&String> {
124        self.comments.get(tag.0.as_str()).and_then(|v| v.first())
125    }
126
127    /// Remove all entries for a particular key. Optionally returns the removed values, if any.
128    pub fn remove_entries(&mut self, tag: &LowercaseString) -> Option<Vec<String>> {
129        self.comments.remove(tag.0.as_str())
130    }
131
132    /// Remove all entries for a particular key, inserting the given values instead.
133    pub fn set_entries(
134        &mut self,
135        tag: LowercaseString,
136        values: Vec<String>,
137    ) -> Option<Vec<String>> {
138        self.comments.insert(tag.0, values)
139    }
140
141    /// Gets the vendor string
142    #[must_use]
143    pub fn get_vendor(&self) -> &str {
144        &self.vendor
145    }
146
147    /// Sets the vendor string.
148    pub fn set_vendor(&mut self, new_vendor: String) {
149        self.vendor = new_vendor;
150    }
151
152    /// Add a picture. If a picture with the same `PictureType` already exists, it is removed first.
153    /// # Errors
154    /// This function will error if [`remove_picture_type`](Self::remove_picture_type) errors, or
155    /// if encoding the given data to Opus format or to base64 errors.
156    pub fn add_picture(&mut self, picture: &Picture) -> Result<()> {
157        let _ = self.remove_picture_type(picture.picture_type)?;
158        let data = picture.to_base64()?;
159        self.add_one(PICTURE_BLOCK_TAG.into(), data);
160        Ok(())
161    }
162
163    /// Removes a picture with the given picture type. Returns the removed picture for convenience.
164    /// # Errors
165    /// Although rare, this function can error if a picture with the given type is not found AND
166    /// the first picture in the set is not decoded properly.
167    pub fn remove_picture_type(&mut self, picture_type: PictureType) -> Result<Option<Picture>> {
168        let Some(pictures) = self.comments.get_mut(PICTURE_BLOCK_TAG) else {
169            return Ok(None);
170        };
171        let mut index_to_remove = 0;
172        for (index, data) in (*pictures).iter().enumerate() {
173            if let Ok(pic) = Picture::from_base64(data) {
174                if pic.picture_type == picture_type {
175                    index_to_remove = index;
176                }
177            }
178        }
179
180        Picture::from_base64(&pictures.remove(index_to_remove)).map(Some)
181    }
182
183    /// Gets a picture which has a certain picture type, or None if there are no pictures with that
184    /// type.
185    #[must_use]
186    pub fn get_picture_type(&self, picture_type: PictureType) -> Option<Picture> {
187        let pictures = self.comments.get(PICTURE_BLOCK_TAG)?;
188        for picture in pictures {
189            if let Ok(decoded) = Picture::from_base64(picture) {
190                if decoded.picture_type == picture_type {
191                    return Some(decoded);
192                }
193            }
194        }
195
196        None
197    }
198
199    /// Returns whether any pictures are stored within the opus file.
200    #[must_use]
201    pub fn has_pictures(&self) -> bool {
202        self.comments.contains_key(PICTURE_BLOCK_TAG)
203    }
204
205    /// Returns a Vec of all encoded pictures. This function will skip pictures that are encoded
206    /// improperly.
207    #[must_use]
208    pub fn pictures(&self) -> Vec<Picture> {
209        match self.iter_pictures() {
210            Some(iter) => iter.filter_map(Result::ok).collect(),
211            None => vec![],
212        }
213    }
214}
215
216impl Tag {
217    /// Read a `Tag` from a reader.
218    /// # Errors
219    /// This function can error if:
220    /// - The ogg stream is shorter than expected (e.g. doesn't include the first or second packets)
221    /// - The given reader is not an opus stream
222    /// - The comment header does not include the magic signature
223    /// - The comment header is shorter than mandated by the spec
224    /// - The platform's usize is not at least 32 bits long
225    /// - The spec mandates UTF-8, but the data is invalid unicode
226    /// - A comment line is not in TAG=VALUE format.
227    pub fn read_from<R: Read + Seek>(f_in: R) -> Result<Self> {
228        let mut reader = PacketReader::new(f_in);
229        let first_packet = reader.read_packet()?.ok_or(Error::MissingPacket)?;
230        if !first_packet.data.starts_with(b"OpusHead") {
231            return Err(Error::NotOpus);
232        }
233        let header_packet = reader.read_packet()?.ok_or(Error::MissingPacket)?;
234        let mut cursor = Cursor::new(header_packet.data);
235        cursor.seek_relative(8)?; // length of string "OpusTags"
236        let mut buffer = [0; 4];
237        cursor.read_exact(&mut buffer)?;
238        // only panics on platforms where usize < 32 bits
239        let vendor_length: usize = u32::from_le_bytes(buffer).try_into()?;
240        let mut buffer = vec![0; vendor_length];
241        cursor.read_exact(&mut buffer)?;
242        let vendor = String::from_utf8(buffer)?;
243        let mut buffer = [0; 4];
244        cursor.read_exact(&mut buffer)?;
245        let comment_count = u32::from_le_bytes(buffer);
246        let mut comments: Vec<(String, String)> = Vec::new();
247        for _ in 0..comment_count {
248            let mut buffer = [0; 4];
249            cursor.read_exact(&mut buffer)?;
250            // only panics on platforms where usize < 32 bits
251            let comment_length: usize = u32::from_le_bytes(buffer).try_into()?;
252            let mut buffer = vec![0; comment_length];
253            cursor.read_exact(&mut buffer)?;
254            let comment = String::from_utf8(buffer.clone())?;
255            let pair = comment
256                .split_once('=')
257                .map(|(tag, value)| (tag.to_string(), value.to_string()))
258                .ok_or(Error::MalformedComment(comment))?;
259            comments.push(pair);
260        }
261        Ok(Self::new(vendor, comments))
262    }
263
264    /// Convenience function for reading comments from a path.
265    /// # Errors
266    /// This function will error for the same reasons as [`read_from`](Self::read_from)
267    pub fn read_from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
268        let file = File::open(path)?;
269        Self::read_from(file)
270    }
271
272    /// Writes tags to a writer. This function expects the writer to already contain an existing
273    /// opus stream. This function reads the existing stream, copies it **into memory**, replaces the
274    /// comment header, and dumps the whole stream back into the file.
275    /// # Errors
276    /// This function will error if:
277    /// - No opus stream exists in the target
278    /// - The ogg stream is shorter than expected (e.g. doesn't include the first or second packets)
279    /// - A comment in this Tag object is too big for the opus spec (some string is longer than [`u32::MAX`] bytes,
280    ///   or the object contains more than [`u32::MAX`] comments)
281    /// - An unspecified error occurs while reading ogg packets from the target
282    /// - An error occurs while writing an ogg packet to the target
283    /// - An error occurs while seeking through the target
284    /// - An error occurs while copying the finished ogg stream from memory back to the target
285    pub fn write_to<W: Read + Write + Seek>(&self, mut f_in: W) -> Result<()> {
286        let f_out_raw: Vec<u8> = vec![];
287        let mut cursor = Cursor::new(f_out_raw);
288
289        let mut reader = PacketReader::new(&mut f_in);
290        let mut writer = PacketWriter::new(&mut cursor);
291
292        // first packet
293        {
294            let first_packet = reader.read_packet()?.ok_or(Error::MissingPacket)?;
295            writer.write_packet(
296                first_packet.data.clone(),
297                first_packet.stream_serial(),
298                get_end_info(&first_packet),
299                first_packet.absgp_page(),
300            )?;
301        }
302
303        // second packet, which is the comment header
304        {
305            let comment_header_packet = reader.read_packet()?.ok_or(Error::MissingPacket)?;
306            let new_pack_data = self.to_packet_data()?;
307            writer.write_packet(
308                new_pack_data,
309                comment_header_packet.stream_serial(),
310                PacketWriteEndInfo::EndPage,
311                comment_header_packet.absgp_page(),
312            )?;
313        }
314
315        while let Some(packet) = reader.read_packet()? {
316            let stream_serial = packet.stream_serial();
317            let end_info = get_end_info(&packet);
318            let absgp_page = packet.absgp_page();
319            writer.write_packet(packet.data, stream_serial, end_info, absgp_page)?;
320        }
321        // stream ended
322
323        drop(reader);
324        cursor.seek(std::io::SeekFrom::Start(0))?;
325        f_in.seek(std::io::SeekFrom::Start(0))?;
326        std::io::copy(&mut cursor, &mut f_in)?;
327
328        Ok(())
329    }
330
331    /// Convenience function for writing to a path.
332    /// # Errors
333    /// This function will error for the same reasons as [`write_to`](Self::write_to)
334    pub fn write_to_path<P: AsRef<Path>>(&self, path: P) -> Result<()> {
335        let file = OpenOptions::new().read(true).write(true).open(path)?;
336        self.write_to(file)
337    }
338
339    fn to_packet_data(&self) -> Result<Vec<u8>> {
340        let mut output = vec![];
341        // magic signature
342        output.extend_from_slice(b"OpusTags");
343
344        // encode vendor
345        let vendor = &self.vendor;
346        let vendor_length: u32 = vendor.len().try_into().map_err(|_| Error::TooBigError)?;
347        output.extend_from_slice(&vendor_length.to_le_bytes());
348        output.extend_from_slice(vendor.as_bytes());
349
350        let mut formatted_tags = vec![];
351        for (tag, values) in &self.comments {
352            for value in values {
353                formatted_tags.push(format!("{tag}={value}"));
354            }
355        }
356
357        let num_comments: u32 = formatted_tags
358            .len()
359            .try_into()
360            .map_err(|_| Error::TooBigError)?;
361        output.extend_from_slice(&num_comments.to_le_bytes());
362
363        for tag in formatted_tags {
364            let tag_length: u32 = tag.len().try_into().map_err(|_| Error::TooBigError)?;
365            output.extend_from_slice(&tag_length.to_le_bytes());
366            output.extend_from_slice(tag.as_bytes());
367        }
368
369        Ok(output)
370    }
371}
372
373impl Tag {
374    /// An iterator over the comments of an opus file, excluding pictures.
375    ///
376    /// See [`CommentsIterator`] for more info.
377    #[must_use]
378    pub fn iter_comments(&self) -> CommentsIterator {
379        CommentsIterator {
380            comments_iter: self.comments.iter().filter(|c| c.0 != PICTURE_BLOCK_TAG),
381        }
382    }
383
384    /// An iterator over the images embedded in an opus file.
385    ///
386    /// See [`PicturesIterator`] for more info.
387    #[must_use]
388    pub fn iter_pictures(&self) -> Option<PicturesIterator> {
389        self.comments
390            .get(PICTURE_BLOCK_TAG)
391            .map(|pict_vec| PicturesIterator {
392                pictures_iter: pict_vec.iter(),
393            })
394    }
395
396    /// An iterator over the comment keys of an opus file, excluding the picture block key.
397    ///
398    /// The iterator Item is `&'a str`.
399    /// This iterator immutably borrows the tags stored in the [`Tag`] struct.
400    /// To check whether the set of tags contains pictures, see [`has_pictures`](Tag::has_pictures).
401    pub fn keys(&self) -> impl Iterator<Item = &str> {
402        self.comments
403            .keys()
404            .filter(|k| *k != PICTURE_BLOCK_TAG)
405            .map(AsRef::as_ref)
406    }
407}
408
409fn get_end_info(packet: &ogg::Packet) -> PacketWriteEndInfo {
410    if packet.last_in_stream() {
411        PacketWriteEndInfo::EndStream
412    } else if packet.last_in_page() {
413        PacketWriteEndInfo::EndPage
414    } else {
415        PacketWriteEndInfo::NormalPacket
416    }
417}