lofty/ogg/
read.rs

1use super::tag::VorbisComments;
2use super::verify_signature;
3use crate::config::{ParseOptions, ParsingMode};
4use crate::error::{ErrorKind, LoftyError, Result};
5use crate::macros::{decode_err, err, parse_mode_choice};
6use crate::picture::{MimeType, Picture, PictureInformation, PictureType};
7use crate::tag::Accessor;
8use crate::util::text::{utf8_decode, utf8_decode_str, utf16_decode};
9
10use std::borrow::Cow;
11use std::io::{Read, Seek, SeekFrom};
12
13use byteorder::{LittleEndian, ReadBytesExt};
14use data_encoding::BASE64;
15use ogg_pager::{Packets, PageHeader};
16
17pub type OGGTags = (Option<VorbisComments>, PageHeader, Packets);
18
19pub(crate) fn read_comments<R>(
20	data: &mut R,
21	mut len: u64,
22	parse_options: ParseOptions,
23) -> Result<VorbisComments>
24where
25	R: Read,
26{
27	use crate::macros::try_vec;
28
29	let parse_mode = parse_options.parsing_mode;
30
31	let vendor_len = data.read_u32::<LittleEndian>()?;
32	if u64::from(vendor_len) > len {
33		err!(SizeMismatch);
34	}
35
36	let mut vendor_bytes = try_vec![0; vendor_len as usize];
37	data.read_exact(&mut vendor_bytes)?;
38
39	len -= u64::from(vendor_len);
40
41	let vendor;
42	match utf8_decode(vendor_bytes) {
43		Ok(v) => vendor = v,
44		Err(e) => {
45			// The actions following this are not spec-compliant in the slightest, so
46			// we need to short circuit if strict.
47			if parse_mode == ParsingMode::Strict {
48				return Err(e);
49			}
50
51			log::warn!("Possibly corrupt vendor string, attempting to recover");
52
53			// Some vendor strings have invalid mixed UTF-8 and UTF-16 encodings.
54			// This seems to work, while preserving the string opposed to using
55			// the replacement character
56			let LoftyError {
57				kind: ErrorKind::StringFromUtf8(e),
58			} = e
59			else {
60				return Err(e);
61			};
62			let s = e
63				.as_bytes()
64				.iter()
65				.map(|c| u16::from(*c))
66				.collect::<Vec<_>>();
67
68			match utf16_decode(&s) {
69				Ok(v) => {
70					log::warn!("Vendor string recovered as: '{v}'");
71					vendor = v;
72				},
73				Err(_) => decode_err!(@BAIL "OGG: File has an invalid vendor string"),
74			}
75		},
76	}
77
78	let number_of_items = data.read_u32::<LittleEndian>()?;
79	if number_of_items > (len >> 2) as u32 {
80		err!(SizeMismatch);
81	}
82
83	let mut tag = VorbisComments {
84		vendor,
85		items: Vec::with_capacity(number_of_items as usize),
86		pictures: Vec::new(),
87	};
88
89	for _ in 0..number_of_items {
90		let comment_len = data.read_u32::<LittleEndian>()?;
91		if u64::from(comment_len) > len {
92			err!(SizeMismatch);
93		}
94
95		let mut comment_bytes = try_vec![0; comment_len as usize];
96		data.read_exact(&mut comment_bytes)?;
97
98		len -= u64::from(comment_len);
99
100		// KEY=VALUE
101		let mut comment_split = comment_bytes.splitn(2, |b| *b == b'=');
102
103		let Some(key) = comment_split.next() else {
104			continue;
105		};
106
107		// Make sure there was a separator present, otherwise just move on
108		let Some(value) = comment_split.next() else {
109			log::warn!("No separator found in field, discarding");
110			continue;
111		};
112
113		match key {
114			k if k.eq_ignore_ascii_case(b"METADATA_BLOCK_PICTURE") => {
115				if !parse_options.read_cover_art {
116					continue;
117				}
118
119				match Picture::from_flac_bytes(value, true, parse_mode) {
120					Ok(picture) => tag.pictures.push(picture),
121					Err(e) => {
122						if parse_mode == ParsingMode::Strict {
123							return Err(e);
124						}
125
126						log::warn!("Failed to decode FLAC picture, discarding field");
127						continue;
128					},
129				}
130			},
131			k if k.eq_ignore_ascii_case(b"COVERART") => {
132				if !parse_options.read_cover_art {
133					continue;
134				}
135
136				// `COVERART` is an old deprecated image storage format. We have to convert it
137				// to a `METADATA_BLOCK_PICTURE` for it to be useful.
138				//
139				// <https://wiki.xiph.org/VorbisComment#Conversion_to_METADATA_BLOCK_PICTURE>
140				log::warn!(
141					"Found deprecated `COVERART` field, attempting to convert to \
142					 `METADATA_BLOCK_PICTURE`"
143				);
144
145				let picture_data = BASE64.decode(value);
146
147				match picture_data {
148					Ok(picture_data) => {
149						let mime_type = Picture::mimetype_from_bin(&picture_data)
150							.unwrap_or_else(|_| MimeType::Unknown(String::from("image/")));
151
152						let picture = Picture {
153							pic_type: PictureType::Other,
154							mime_type: Some(mime_type),
155							description: None,
156							data: Cow::from(picture_data),
157						};
158
159						tag.pictures.push((picture, PictureInformation::default()))
160					},
161					Err(_) => {
162						if parse_mode == ParsingMode::Strict {
163							return Err(LoftyError::new(ErrorKind::NotAPicture));
164						}
165
166						log::warn!("Failed to decode FLAC picture, discarding field");
167						continue;
168					},
169				}
170			},
171			// Support the case of TRACKNUMBER being equal to current/total
172			k if k.eq_ignore_ascii_case(b"TRACKNUMBER") => {
173				match utf8_decode_str(value) {
174					Ok(value) => {
175						// try to parse as current/total
176						let mut value_split = value.splitn(2, '/');
177						let track_number: Option<u32> =
178							value_split.next().and_then(|b| b.parse().ok());
179						let track_total: Option<u32> =
180							value_split.next().and_then(|b| b.parse().ok());
181
182						if let Some(n) = track_number {
183							tag.set_track(n);
184						} else {
185							// Probably some other format, like a vinyl track number (A1, B1, etc.).
186							// Just leave it up to the caller to deal with.
187							tag.items
188								.push((String::from("TRACKNUMBER"), value.to_owned()));
189						}
190						if let Some(n) = track_total {
191							tag.set_track_total(n);
192						}
193					},
194					Err(e) => {
195						if parse_mode == ParsingMode::Strict {
196							return Err(e);
197						}
198
199						log::warn!("Non UTF-8 value found, discarding field {key:?}");
200						continue;
201					},
202				}
203			},
204			// The valid range is 0x20..=0x7D not including 0x3D
205			k if k.iter().all(|c| (b' '..=b'}').contains(c) && *c != b'=') => {
206				// SAFETY: We just verified that all of the bytes fall within the subset of ASCII
207				let key = unsafe { String::from_utf8_unchecked(k.to_vec()) };
208
209				match utf8_decode_str(value) {
210					Ok(value) => tag.items.push((key, value.to_owned())),
211					Err(e) => {
212						if parse_mode == ParsingMode::Strict {
213							return Err(e);
214						}
215
216						log::warn!("Non UTF-8 value found, discarding field {key:?}");
217						continue;
218					},
219				}
220			},
221			_ => {
222				parse_mode_choice!(
223					parse_mode,
224					STRICT: decode_err!(@BAIL "OGG: Vorbis comments contain an invalid key"),
225					// Otherwise discard invalid keys
226				)
227			},
228		}
229	}
230
231	Ok(tag)
232}
233
234pub(crate) fn read_from<T>(
235	data: &mut T,
236	header_sig: &[u8],
237	comment_sig: &[u8],
238	packets_to_read: isize,
239	parse_options: ParseOptions,
240) -> Result<OGGTags>
241where
242	T: Read + Seek,
243{
244	debug_assert!(packets_to_read >= 2);
245
246	// TODO: Would be nice if we didn't have to read just to seek and reread immediately
247	let start = data.stream_position()?;
248	let first_page_header = PageHeader::read(data)?;
249
250	data.seek(SeekFrom::Start(start))?;
251
252	// Read the header packets
253	let packets = Packets::read_count(data, packets_to_read)?;
254
255	let identification_packet = packets
256		.get(0)
257		.ok_or_else(|| decode_err!("OGG: Expected identification packet"))?;
258	verify_signature(identification_packet, header_sig)?;
259
260	if !parse_options.read_tags {
261		return Ok((None, first_page_header, packets));
262	}
263
264	let mut metadata_packet = packets
265		.get(1)
266		.ok_or_else(|| decode_err!("OGG: Expected comment packet"))?;
267	verify_signature(metadata_packet, comment_sig)?;
268
269	// Remove the signature from the packet
270	metadata_packet = &metadata_packet[comment_sig.len()..];
271
272	let reader = &mut metadata_packet;
273	let tag = read_comments(reader, reader.len() as u64, parse_options)?;
274
275	Ok((Some(tag), first_page_header, packets))
276}