lofty/id3/v2/items/
sync_text.rs

1use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
2use crate::id3::v2::{FrameFlags, FrameHeader, FrameId};
3use crate::macros::err;
4use crate::util::text::{
5	TextDecodeOptions, TextEncoding, decode_text, encode_text, read_to_terminator,
6	utf16_decode_bytes,
7};
8
9use std::borrow::Cow;
10use std::io::{Cursor, Read, Seek, SeekFrom, Write};
11
12use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
13
14const FRAME_ID: FrameId<'static> = FrameId::Valid(Cow::Borrowed("SYLT"));
15
16/// The unit used for [`SynchronizedTextFrame`] timestamps
17#[derive(Copy, Clone, PartialEq, Debug, Eq, Hash)]
18#[repr(u8)]
19pub enum TimestampFormat {
20	/// The unit is MPEG frames
21	MPEG = 1,
22	/// The unit is milliseconds
23	MS = 2,
24}
25
26impl TimestampFormat {
27	/// Get a `TimestampFormat` from a u8, must be 1-2 inclusive
28	pub fn from_u8(byte: u8) -> Option<Self> {
29		match byte {
30			1 => Some(Self::MPEG),
31			2 => Some(Self::MS),
32			_ => None,
33		}
34	}
35}
36
37/// The type of text stored in a [`SynchronizedTextFrame`]
38#[derive(Copy, Clone, PartialEq, Debug, Eq, Hash)]
39#[repr(u8)]
40#[allow(missing_docs)]
41pub enum SyncTextContentType {
42	Other = 0,
43	Lyrics = 1,
44	TextTranscription = 2,
45	PartName = 3,
46	Events = 4,
47	Chord = 5,
48	Trivia = 6,
49	WebpageURL = 7,
50	ImageURL = 8,
51}
52
53impl SyncTextContentType {
54	/// Get a `SyncTextContentType` from a u8, must be 0-8 inclusive
55	pub fn from_u8(byte: u8) -> Option<Self> {
56		match byte {
57			0 => Some(Self::Other),
58			1 => Some(Self::Lyrics),
59			2 => Some(Self::TextTranscription),
60			3 => Some(Self::PartName),
61			4 => Some(Self::Events),
62			5 => Some(Self::Chord),
63			6 => Some(Self::Trivia),
64			7 => Some(Self::WebpageURL),
65			8 => Some(Self::ImageURL),
66			_ => None,
67		}
68	}
69}
70
71/// Represents an ID3v2 synchronized text frame
72#[derive(Clone, Debug, PartialEq, Eq, Hash)]
73pub struct SynchronizedTextFrame<'a> {
74	pub(crate) header: FrameHeader<'a>,
75	/// The text encoding (description/text)
76	pub encoding: TextEncoding,
77	/// ISO-639-2 language code (3 bytes)
78	pub language: [u8; 3],
79	/// The format of the timestamps
80	pub timestamp_format: TimestampFormat,
81	/// The type of content stored
82	pub content_type: SyncTextContentType,
83	/// Unique content description
84	pub description: Option<String>,
85	/// Collection of timestamps and text
86	pub content: Vec<(u32, String)>,
87}
88
89impl SynchronizedTextFrame<'_> {
90	/// Create a new [`SynchronizedTextFrame`]
91	pub fn new(
92		encoding: TextEncoding,
93		language: [u8; 3],
94		timestamp_format: TimestampFormat,
95		content_type: SyncTextContentType,
96		description: Option<String>,
97		content: Vec<(u32, String)>,
98	) -> Self {
99		let header = FrameHeader::new(FRAME_ID, FrameFlags::default());
100		Self {
101			header,
102			encoding,
103			language,
104			timestamp_format,
105			content_type,
106			description,
107			content,
108		}
109	}
110
111	/// Get the ID for the frame
112	pub fn id(&self) -> FrameId<'_> {
113		FRAME_ID
114	}
115
116	/// Get the flags for the frame
117	pub fn flags(&self) -> FrameFlags {
118		self.header.flags
119	}
120
121	/// Set the flags for the frame
122	pub fn set_flags(&mut self, flags: FrameFlags) {
123		self.header.flags = flags;
124	}
125
126	/// Read a [`SynchronizedTextFrame`] from a slice
127	///
128	/// NOTE: This expects the frame header to have already been skipped
129	///
130	/// # Errors
131	///
132	/// This function will return [`BadSyncText`][Id3v2ErrorKind::BadSyncText] if at any point it's unable to parse the data
133	#[allow(clippy::missing_panics_doc)] // Infallible
134	pub fn parse(data: &[u8], frame_flags: FrameFlags) -> Result<Self> {
135		if data.len() < 7 {
136			return Err(Id3v2Error::new(Id3v2ErrorKind::BadFrameLength).into());
137		}
138
139		let encoding = TextEncoding::from_u8(data[0])
140			.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
141		let language: [u8; 3] = data[1..4].try_into().unwrap();
142		if language.iter().any(|c| !c.is_ascii_alphabetic()) {
143			return Err(Id3v2Error::new(Id3v2ErrorKind::BadSyncText).into());
144		}
145		let timestamp_format = TimestampFormat::from_u8(data[4])
146			.ok_or_else(|| Id3v2Error::new(Id3v2ErrorKind::BadTimestampFormat))?;
147		let content_type = SyncTextContentType::from_u8(data[5])
148			.ok_or_else(|| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
149
150		let mut cursor = Cursor::new(&data[6..]);
151		let description = crate::util::text::decode_text(
152			&mut cursor,
153			TextDecodeOptions::new().encoding(encoding).terminated(true),
154		)
155		.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?
156		.text_or_none();
157
158		let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes;
159
160		// It's possible for the description to be the only string with a BOM
161		// To be safe, we change the encoding to the concrete variant determined from the description
162		if encoding == TextEncoding::UTF16 {
163			endianness = match cursor.get_ref()[..=1] {
164				[0xFF, 0xFE] => u16::from_le_bytes,
165				[0xFE, 0xFF] => u16::from_be_bytes,
166				// Since the description was already read, we can assume the BOM was valid
167				_ => unreachable!(),
168			};
169		}
170
171		let mut pos = 0;
172		let total = (data.len() - 6) as u64 - cursor.stream_position()?;
173
174		let mut content = Vec::new();
175
176		while pos < total {
177			let text = (|| -> Result<String> {
178				if encoding == TextEncoding::UTF16 {
179					// Check for a BOM
180					let mut bom = [0; 2];
181					cursor
182						.read_exact(&mut bom)
183						.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
184
185					cursor.seek(SeekFrom::Current(-2))?;
186
187					// Encountered text that doesn't include a BOM
188					if bom != [0xFF, 0xFE] && bom != [0xFE, 0xFF] {
189						let (raw_text, _) = read_to_terminator(&mut cursor, TextEncoding::UTF16);
190						return utf16_decode_bytes(&raw_text, endianness)
191							.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText).into());
192					}
193				}
194
195				let decoded_text = decode_text(
196					&mut cursor,
197					TextDecodeOptions::new().encoding(encoding).terminated(true),
198				)
199				.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
200				pos += decoded_text.bytes_read as u64;
201
202				Ok(decoded_text.content)
203			})()?;
204
205			let time = cursor
206				.read_u32::<BigEndian>()
207				.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
208			pos += 4;
209
210			content.push((time, text));
211		}
212
213		let header = FrameHeader::new(FRAME_ID, frame_flags);
214		Ok(Self {
215			header,
216			encoding,
217			language,
218			timestamp_format,
219			content_type,
220			description,
221			content,
222		})
223	}
224
225	/// Convert a [`SynchronizedTextFrame`] to an ID3v2 SYLT frame byte Vec
226	///
227	/// NOTE: This does not include the frame header
228	///
229	/// # Errors
230	///
231	/// * `content`'s length > [`u32::MAX`]
232	/// * `language` is not exactly 3 bytes
233	/// * `language` contains invalid characters (Only `'a'..='z'` and `'A'..='Z'` allowed)
234	pub fn as_bytes(&self) -> Result<Vec<u8>> {
235		let mut data = vec![self.encoding as u8];
236
237		if self.language.len() == 3 && self.language.iter().all(u8::is_ascii_alphabetic) {
238			data.write_all(&self.language)?;
239			data.write_u8(self.timestamp_format as u8)?;
240			data.write_u8(self.content_type as u8)?;
241
242			if let Some(description) = &self.description {
243				data.write_all(&encode_text(description, self.encoding, true))?;
244			} else {
245				data.write_u8(0)?;
246			}
247
248			for (time, text) in &self.content {
249				data.write_all(&encode_text(text, self.encoding, true))?;
250				data.write_u32::<BigEndian>(*time)?;
251			}
252
253			if data.len() as u64 > u64::from(u32::MAX) {
254				err!(TooMuchData);
255			}
256
257			return Ok(data);
258		}
259
260		Err(Id3v2Error::new(Id3v2ErrorKind::BadSyncText).into())
261	}
262}
263
264#[cfg(test)]
265mod tests {
266	use crate::id3::v2::{
267		FrameFlags, FrameHeader, SyncTextContentType, SynchronizedTextFrame, TimestampFormat,
268	};
269	use crate::util::text::TextEncoding;
270
271	fn expected(encoding: TextEncoding) -> SynchronizedTextFrame<'static> {
272		SynchronizedTextFrame {
273			header: FrameHeader::new(super::FRAME_ID, FrameFlags::default()),
274			encoding,
275			language: *b"eng",
276			timestamp_format: TimestampFormat::MS,
277			content_type: SyncTextContentType::Lyrics,
278			description: Some(String::from("Test Sync Text")),
279			content: vec![
280				(0, String::from("\nLofty")),
281				(10000, String::from("\nIs")),
282				(15000, String::from("\nReading")),
283				(30000, String::from("\nThis")),
284				(1_938_000, String::from("\nCorrectly")),
285			],
286		}
287	}
288
289	#[test_log::test]
290	fn sylt_decode() {
291		let cont = crate::tag::utils::test_utils::read_path("tests/tags/assets/id3v2/test.sylt");
292
293		let parsed_sylt = SynchronizedTextFrame::parse(&cont, FrameFlags::default()).unwrap();
294
295		assert_eq!(parsed_sylt, expected(TextEncoding::Latin1));
296	}
297
298	#[test_log::test]
299	fn sylt_encode() {
300		let encoded = expected(TextEncoding::Latin1).as_bytes().unwrap();
301
302		let expected_bytes =
303			crate::tag::utils::test_utils::read_path("tests/tags/assets/id3v2/test.sylt");
304
305		assert_eq!(encoded, expected_bytes);
306	}
307
308	#[test_log::test]
309	fn sylt_decode_utf16() {
310		let cont =
311			crate::tag::utils::test_utils::read_path("tests/tags/assets/id3v2/test_utf16.sylt");
312
313		let parsed_sylt = SynchronizedTextFrame::parse(&cont, FrameFlags::default()).unwrap();
314
315		assert_eq!(parsed_sylt, expected(TextEncoding::UTF16));
316	}
317
318	#[test_log::test]
319	fn sylt_encode_utf_16() {
320		let encoded = expected(TextEncoding::UTF16).as_bytes().unwrap();
321
322		let expected_bytes =
323			crate::tag::utils::test_utils::read_path("tests/tags/assets/id3v2/test_utf16.sylt");
324
325		assert_eq!(encoded, expected_bytes);
326	}
327}