Skip to main content

lofty/ogg/
read.rs

1use super::tag::VorbisComments;
2use super::verify_signature;
3use crate::config::{ParseOptions, ParsingMode};
4use crate::error::{ErrorKind, LoftyError, Result};
5use crate::macros::{decode_err, err, parse_mode_choice};
6use crate::picture::{MimeType, Picture, PictureInformation, PictureType};
7use crate::tag::Accessor;
8use crate::util::text::{utf8_decode, utf8_decode_str, utf16_decode};
9
10use std::borrow::Cow;
11use std::io::{Read, Seek, SeekFrom};
12
13use byteorder::{LittleEndian, ReadBytesExt};
14use data_encoding::BASE64;
15use ogg_pager::{Packets, PageHeader};
16
17pub type OGGTags = (Option<VorbisComments>, PageHeader, Packets);
18
19pub(crate) fn read_comments<R>(
20	data: &mut R,
21	mut len: u64,
22	parse_options: ParseOptions,
23) -> Result<VorbisComments>
24where
25	R: Read,
26{
27	use crate::macros::try_vec;
28
29	let parse_mode = parse_options.parsing_mode;
30
31	let vendor_len = data.read_u32::<LittleEndian>()?;
32	if u64::from(vendor_len) > len {
33		err!(SizeMismatch);
34	}
35
36	let mut vendor_bytes = try_vec![0; vendor_len as usize];
37	data.read_exact(&mut vendor_bytes)?;
38
39	len -= u64::from(vendor_len);
40
41	let vendor;
42	match utf8_decode(vendor_bytes) {
43		Ok(v) => vendor = v,
44		Err(e) => {
45			// The actions following this are not spec-compliant in the slightest, so
46			// we need to short circuit if strict.
47			if parse_mode == ParsingMode::Strict {
48				return Err(e);
49			}
50
51			log::warn!("Possibly corrupt vendor string, attempting to recover");
52
53			// Some vendor strings have invalid mixed UTF-8 and UTF-16 encodings.
54			// This seems to work, while preserving the string opposed to using
55			// the replacement character
56			let LoftyError {
57				kind: ErrorKind::StringFromUtf8(e),
58			} = e
59			else {
60				return Err(e);
61			};
62			let s = e
63				.as_bytes()
64				.iter()
65				.map(|c| u16::from(*c))
66				.collect::<Vec<_>>();
67
68			match utf16_decode(&s) {
69				Ok(v) => {
70					log::warn!("Vendor string recovered as: '{v}'");
71					vendor = v;
72				},
73				Err(_) => decode_err!(@BAIL "OGG: File has an invalid vendor string"),
74			}
75		},
76	}
77
78	let number_of_items = data.read_u32::<LittleEndian>()?;
79	if number_of_items > (len >> 2) as u32 {
80		err!(SizeMismatch);
81	}
82
83	let mut tag = VorbisComments {
84		vendor,
85		items: Vec::with_capacity(number_of_items as usize),
86		pictures: Vec::new(),
87	};
88
89	for _ in 0..number_of_items {
90		let comment_len = data.read_u32::<LittleEndian>()?;
91		if u64::from(comment_len) > len {
92			err!(SizeMismatch);
93		}
94
95		let mut comment_bytes = try_vec![0; comment_len as usize];
96		data.read_exact(&mut comment_bytes)?;
97
98		len -= u64::from(comment_len);
99
100		// KEY=VALUE
101		let mut comment_split = comment_bytes.splitn(2, |b| *b == b'=');
102
103		let Some(key) = comment_split.next() else {
104			continue;
105		};
106
107		// Make sure there was a separator present, otherwise just move on
108		let Some(value) = comment_split.next() else {
109			log::warn!("No separator found in field, discarding");
110			continue;
111		};
112
113		match key {
114			k if k.eq_ignore_ascii_case(b"METADATA_BLOCK_PICTURE") => {
115				if !parse_options.read_cover_art {
116					continue;
117				}
118
119				match Picture::from_flac_bytes(value, true, parse_mode) {
120					Ok(picture) => tag.pictures.push(picture),
121					Err(e) => {
122						if parse_mode == ParsingMode::Strict {
123							return Err(e);
124						}
125
126						log::warn!("Failed to decode FLAC picture, discarding field");
127						continue;
128					},
129				}
130			},
131			k if k.eq_ignore_ascii_case(b"COVERART") => {
132				if !parse_options.read_cover_art {
133					continue;
134				}
135
136				// `COVERART` is an old deprecated image storage format. We have to convert it
137				// to a `METADATA_BLOCK_PICTURE` for it to be useful.
138				//
139				// <https://wiki.xiph.org/VorbisComment#Conversion_to_METADATA_BLOCK_PICTURE>
140				log::warn!(
141					"Found deprecated `COVERART` field, attempting to convert to \
142					 `METADATA_BLOCK_PICTURE`"
143				);
144
145				let picture_data = BASE64.decode(value);
146
147				match picture_data {
148					Ok(picture_data) => {
149						let mime_type = Picture::mimetype_from_bin(&picture_data)
150							.unwrap_or_else(|_| MimeType::Unknown(String::from("image/")));
151
152						let picture = Picture {
153							pic_type: PictureType::Other,
154							mime_type: Some(mime_type),
155							description: None,
156							data: Cow::from(picture_data),
157						};
158
159						tag.pictures.push((picture, PictureInformation::default()))
160					},
161					Err(_) => {
162						if parse_mode == ParsingMode::Strict {
163							return Err(LoftyError::new(ErrorKind::NotAPicture));
164						}
165
166						log::warn!("Failed to decode FLAC picture, discarding field");
167						continue;
168					},
169				}
170			},
171			// Support the case of TRACKNUMBER / DISCNUMBER being equal to current/total
172			k if (k.eq_ignore_ascii_case(b"TRACKNUMBER")
173				|| k.eq_ignore_ascii_case(b"DISCNUMBER")) =>
174			{
175				match utf8_decode_str(value) {
176					Ok(value) => {
177						let key = if k.eq_ignore_ascii_case(b"TRACKNUMBER") {
178							String::from("TRACKNUMBER")
179						} else {
180							String::from("DISCNUMBER")
181						};
182
183						if !parse_options.implicit_conversions {
184							tag.items.push((key, value.to_owned()));
185							continue;
186						}
187
188						// try to parse as current/total
189						let mut value_split = value.splitn(2, '/');
190						let current: Option<u32> = value_split.next().and_then(|b| b.parse().ok());
191						let total: Option<u32> = value_split.next().and_then(|b| b.parse().ok());
192
193						match key.as_str() {
194							"TRACKNUMBER" => {
195								if let Some(n) = total {
196									tag.set_track_total(n);
197								}
198								if let Some(n) = current {
199									tag.set_track(n);
200								} else {
201									// Probably some other format, like a vinyl track number (A1, B1, etc.).
202									// Just leave it up to the caller to deal with.
203									tag.items.push((key, value.to_owned()));
204								}
205							},
206							"DISCNUMBER" => {
207								if let Some(n) = total {
208									tag.set_disk_total(n);
209								}
210								if let Some(n) = current {
211									tag.set_disk(n);
212								} else {
213									// Probably some other format, like a vinyl track number (A1, B1, etc.).
214									// Just leave it up to the caller to deal with.
215									tag.items.push((key, value.to_owned()));
216								}
217							},
218							_ => {},
219						}
220					},
221					Err(e) => {
222						if parse_mode == ParsingMode::Strict {
223							return Err(e);
224						}
225
226						log::warn!("Non UTF-8 value found, discarding field {key:?}");
227						continue;
228					},
229				}
230			},
231			k if valid_vorbis_comments_key(k) => {
232				// SAFETY: We just verified that all of the bytes fall within the subset of ASCII
233				let key = unsafe { String::from_utf8_unchecked(k.to_vec()) };
234
235				match utf8_decode_str(value) {
236					Ok(value) => tag.items.push((key, value.to_owned())),
237					Err(e) => {
238						if parse_mode == ParsingMode::Strict {
239							return Err(e);
240						}
241
242						log::warn!("Non UTF-8 value found, discarding field {key:?}");
243						continue;
244					},
245				}
246			},
247			_ => {
248				parse_mode_choice!(
249					parse_mode,
250					STRICT: decode_err!(@BAIL "OGG: Vorbis comments contain an invalid key"),
251					// Otherwise discard invalid keys
252				)
253			},
254		}
255	}
256
257	Ok(tag)
258}
259
260pub(super) fn valid_vorbis_comments_key(key: &[u8]) -> bool {
261	// The valid range is 0x20..=0x7D not including 0x3D
262	key.iter().all(|c| (b' '..=b'}').contains(c) && *c != b'=')
263}
264
265pub(crate) fn read_from<T>(
266	data: &mut T,
267	header_sig: &[u8],
268	comment_sig: &[u8],
269	packets_to_read: isize,
270	parse_options: ParseOptions,
271) -> Result<OGGTags>
272where
273	T: Read + Seek,
274{
275	debug_assert!(packets_to_read >= 2);
276
277	// TODO: Would be nice if we didn't have to read just to seek and reread immediately
278	let start = data.stream_position()?;
279	let first_page_header = PageHeader::read(data)?;
280
281	data.seek(SeekFrom::Start(start))?;
282
283	// Read the header packets
284	let packets = Packets::read_count(data, packets_to_read)?;
285
286	let identification_packet = packets
287		.get(0)
288		.ok_or_else(|| decode_err!("OGG: Expected identification packet"))?;
289	verify_signature(identification_packet, header_sig)?;
290
291	if !parse_options.read_tags {
292		return Ok((None, first_page_header, packets));
293	}
294
295	let mut metadata_packet = packets
296		.get(1)
297		.ok_or_else(|| decode_err!("OGG: Expected comment packet"))?;
298	verify_signature(metadata_packet, comment_sig)?;
299
300	// Remove the signature from the packet
301	metadata_packet = &metadata_packet[comment_sig.len()..];
302
303	let reader = &mut metadata_packet;
304	let tag = read_comments(reader, reader.len() as u64, parse_options)?;
305
306	Ok((Some(tag), first_page_header, packets))
307}