1use std::io::{Read, Seek, SeekFrom};
8
9use crate::error::{ApeError, ApeResult};
10
11const MAX_TAG_SIZE: u32 = 16 * 1024 * 1024;
13
14#[derive(Debug, Clone)]
16pub struct Id3v2Frame {
17 pub id: String,
19 pub data: Vec<u8>,
21}
22
23#[derive(Debug, Clone)]
25pub struct Id3v2Tag {
26 pub version: (u8, u8),
28 pub frames: Vec<Id3v2Frame>,
30}
31
32impl Id3v2Tag {
33 fn text_frame(&self, id: &str) -> Option<String> {
35 self.frames
36 .iter()
37 .find(|f| f.id == id)
38 .and_then(|f| decode_text_frame(&f.data))
39 }
40
41 pub fn title(&self) -> Option<String> {
43 self.text_frame("TIT2")
44 }
45
46 pub fn artist(&self) -> Option<String> {
48 self.text_frame("TPE1")
49 }
50
51 pub fn album(&self) -> Option<String> {
53 self.text_frame("TALB")
54 }
55
56 pub fn year(&self) -> Option<String> {
58 self.text_frame("TDRC").or_else(|| self.text_frame("TYER"))
59 }
60
61 pub fn track(&self) -> Option<String> {
63 self.text_frame("TRCK")
64 }
65
66 pub fn genre(&self) -> Option<String> {
68 self.text_frame("TCON")
69 }
70
71 pub fn comment(&self) -> Option<String> {
73 self.frames
74 .iter()
75 .find(|f| f.id == "COMM")
76 .and_then(|f| decode_comment_frame(&f.data))
77 }
78}
79
80pub fn read_id3v2<R: Read + Seek>(reader: &mut R) -> ApeResult<Option<Id3v2Tag>> {
88 reader.seek(SeekFrom::Start(0))?;
89
90 let mut header = [0u8; 10];
92 if reader.read(&mut header)? < 10 {
93 return Ok(None);
94 }
95
96 if &header[0..3] != b"ID3" {
98 return Ok(None);
99 }
100
101 let major = header[3];
102 let revision = header[4];
103 let flags = header[5];
104
105 if major != 3 && major != 4 {
107 return Err(ApeError::InvalidFormat("unsupported ID3v2 version"));
108 }
109
110 if flags & 0x80 != 0 {
112 return Err(ApeError::InvalidFormat(
113 "ID3v2 unsynchronization is not supported",
114 ));
115 }
116
117 let size = decode_syncsafe(&header[6..10]);
118 if size > MAX_TAG_SIZE {
119 return Err(ApeError::InvalidFormat("ID3v2 tag too large"));
120 }
121
122 let mut tag_data = vec![0u8; size as usize];
124 let bytes_read = read_full(reader, &mut tag_data)?;
125 tag_data.truncate(bytes_read);
126
127 let mut offset = 0usize;
129 if flags & 0x40 != 0 {
130 if tag_data.len() < 4 {
131 return Ok(Some(Id3v2Tag {
132 version: (major, revision),
133 frames: Vec::new(),
134 }));
135 }
136 let ext_size = if major == 4 {
137 decode_syncsafe(&tag_data[0..4]) as usize
138 } else {
139 u32::from_be_bytes([tag_data[0], tag_data[1], tag_data[2], tag_data[3]]) as usize
140 };
141 offset = if major == 4 { ext_size } else { ext_size + 4 };
143 if offset > tag_data.len() {
144 offset = tag_data.len();
145 }
146 }
147
148 let frames = parse_frames(&tag_data[offset..], major)?;
150
151 Ok(Some(Id3v2Tag {
152 version: (major, revision),
153 frames,
154 }))
155}
156
157fn decode_syncsafe(b: &[u8]) -> u32 {
163 ((b[0] as u32) << 21) | ((b[1] as u32) << 14) | ((b[2] as u32) << 7) | (b[3] as u32)
164}
165
166fn read_full<R: Read>(reader: &mut R, buf: &mut [u8]) -> ApeResult<usize> {
168 let mut total = 0;
169 while total < buf.len() {
170 match reader.read(&mut buf[total..])? {
171 0 => break,
172 n => total += n,
173 }
174 }
175 Ok(total)
176}
177
178fn parse_frames(data: &[u8], major: u8) -> ApeResult<Vec<Id3v2Frame>> {
180 let mut frames = Vec::new();
181 let mut pos = 0;
182
183 loop {
184 if pos + 10 > data.len() {
186 break;
187 }
188
189 if data[pos] == 0x00 {
191 break;
192 }
193
194 let id_bytes = &data[pos..pos + 4];
195 if !id_bytes.iter().all(|&b| b.is_ascii_alphanumeric()) {
197 break;
198 }
199
200 let id = String::from_utf8_lossy(id_bytes).into_owned();
201
202 let frame_size = if major == 4 {
203 decode_syncsafe(&data[pos + 4..pos + 8])
204 } else {
205 u32::from_be_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
206 } as usize;
207
208 pos += 10;
210
211 if frame_size == 0 || pos + frame_size > data.len() {
212 break;
214 }
215
216 let frame_data = data[pos..pos + frame_size].to_vec();
217 pos += frame_size;
218
219 frames.push(Id3v2Frame {
220 id,
221 data: frame_data,
222 });
223 }
224
225 Ok(frames)
226}
227
228fn decode_text_frame(data: &[u8]) -> Option<String> {
231 if data.is_empty() {
232 return None;
233 }
234
235 let encoding = data[0];
236 let payload = &data[1..];
237
238 if payload.is_empty() {
239 return None;
240 }
241
242 let text = match encoding {
243 0 => decode_iso_8859_1(payload),
244 1 => decode_utf16_with_bom(payload),
245 2 => decode_utf16be(payload),
246 3 => decode_utf8(payload),
247 _ => return None,
248 };
249
250 let text = text.trim_end_matches('\0').to_string();
252 if text.is_empty() {
253 None
254 } else {
255 Some(text)
256 }
257}
258
259fn decode_comment_frame(data: &[u8]) -> Option<String> {
262 if data.len() < 5 {
263 return None;
264 }
265
266 let encoding = data[0];
267 let rest = &data[4..];
269
270 let (text_start, _) = match encoding {
274 0 | 3 => {
275 let nul = rest.iter().position(|&b| b == 0)?;
276 (nul + 1, &rest[..nul])
277 }
278 1 | 2 => {
279 let nul = find_double_nul(rest)?;
280 (nul + 2, &rest[..nul])
281 }
282 _ => return None,
283 };
284
285 if text_start >= rest.len() {
286 return None;
287 }
288
289 let payload = &rest[text_start..];
290 let text = match encoding {
291 0 => decode_iso_8859_1(payload),
292 1 => decode_utf16_with_bom(payload),
293 2 => decode_utf16be(payload),
294 3 => decode_utf8(payload),
295 _ => return None,
296 };
297
298 let text = text.trim_end_matches('\0').to_string();
299 if text.is_empty() {
300 None
301 } else {
302 Some(text)
303 }
304}
305
306fn find_double_nul(data: &[u8]) -> Option<usize> {
308 let mut i = 0;
309 while i + 1 < data.len() {
310 if data[i] == 0 && data[i + 1] == 0 {
311 return Some(i);
312 }
313 i += 2;
314 }
315 None
316}
317
318fn decode_iso_8859_1(data: &[u8]) -> String {
321 data.iter().map(|&b| b as char).collect()
322}
323
324fn decode_utf8(data: &[u8]) -> String {
325 String::from_utf8_lossy(data).into_owned()
326}
327
328fn decode_utf16_with_bom(data: &[u8]) -> String {
329 if data.len() < 2 {
330 return String::new();
331 }
332
333 let (big_endian, payload) = if data[0] == 0xFE && data[1] == 0xFF {
334 (true, &data[2..])
335 } else if data[0] == 0xFF && data[1] == 0xFE {
336 (false, &data[2..])
337 } else {
338 (false, data)
340 };
341
342 decode_utf16_raw(payload, big_endian)
343}
344
345fn decode_utf16be(data: &[u8]) -> String {
346 decode_utf16_raw(data, true)
347}
348
349fn decode_utf16_raw(data: &[u8], big_endian: bool) -> String {
350 let code_units: Vec<u16> = data
351 .chunks_exact(2)
352 .map(|pair| {
353 if big_endian {
354 u16::from_be_bytes([pair[0], pair[1]])
355 } else {
356 u16::from_le_bytes([pair[0], pair[1]])
357 }
358 })
359 .collect();
360
361 String::from_utf16_lossy(&code_units)
362}
363
364#[cfg(test)]
369mod tests {
370 use super::*;
371 use std::io::Cursor;
372
373 fn encode_syncsafe(value: u32) -> [u8; 4] {
375 [
376 ((value >> 21) & 0x7F) as u8,
377 ((value >> 14) & 0x7F) as u8,
378 ((value >> 7) & 0x7F) as u8,
379 (value & 0x7F) as u8,
380 ]
381 }
382
383 fn build_id3v2_tag(major: u8, flags: u8, frames_data: &[u8]) -> Vec<u8> {
385 let mut buf = Vec::new();
386 buf.extend_from_slice(b"ID3");
387 buf.push(major); buf.push(0); buf.push(flags);
390 let size = encode_syncsafe(frames_data.len() as u32);
391 buf.extend_from_slice(&size);
392 buf.extend_from_slice(frames_data);
393 buf
394 }
395
396 fn build_v23_text_frame(id: &str, encoding: u8, text: &[u8]) -> Vec<u8> {
398 let mut frame = Vec::new();
399 frame.extend_from_slice(id.as_bytes());
400 let data_len = 1 + text.len(); frame.extend_from_slice(&(data_len as u32).to_be_bytes());
402 frame.extend_from_slice(&[0x00, 0x00]); frame.push(encoding);
404 frame.extend_from_slice(text);
405 frame
406 }
407
408 fn build_v24_text_frame(id: &str, encoding: u8, text: &[u8]) -> Vec<u8> {
410 let mut frame = Vec::new();
411 frame.extend_from_slice(id.as_bytes());
412 let data_len = 1 + text.len();
413 frame.extend_from_slice(&encode_syncsafe(data_len as u32));
414 frame.extend_from_slice(&[0x00, 0x00]); frame.push(encoding);
416 frame.extend_from_slice(text);
417 frame
418 }
419
420 #[test]
423 fn test_parse_id3v23_iso8859() {
424 let mut frames_data = Vec::new();
425 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 0, b"Hello World"));
426 frames_data.extend_from_slice(&build_v23_text_frame("TPE1", 0, b"Test Artist"));
427 frames_data.extend_from_slice(&build_v23_text_frame("TALB", 0, b"Test Album"));
428 frames_data.extend_from_slice(&build_v23_text_frame("TYER", 0, b"2024"));
429 frames_data.extend_from_slice(&build_v23_text_frame("TRCK", 0, b"7"));
430 frames_data.extend_from_slice(&build_v23_text_frame("TCON", 0, b"Rock"));
431
432 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
433 let mut cursor = Cursor::new(tag_bytes);
434
435 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
436 assert_eq!(tag.version, (3, 0));
437 assert_eq!(tag.frames.len(), 6);
438 assert_eq!(tag.title().as_deref(), Some("Hello World"));
439 assert_eq!(tag.artist().as_deref(), Some("Test Artist"));
440 assert_eq!(tag.album().as_deref(), Some("Test Album"));
441 assert_eq!(tag.year().as_deref(), Some("2024"));
442 assert_eq!(tag.track().as_deref(), Some("7"));
443 assert_eq!(tag.genre().as_deref(), Some("Rock"));
444 }
445
446 #[test]
447 fn test_parse_id3v23_utf8() {
448 let mut frames_data = Vec::new();
449 frames_data.extend_from_slice(&build_v23_text_frame(
450 "TIT2",
451 3,
452 "Caf\u{00e9} Music".as_bytes(),
453 ));
454
455 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
456 let mut cursor = Cursor::new(tag_bytes);
457
458 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
459 assert_eq!(tag.title().as_deref(), Some("Caf\u{00e9} Music"));
460 }
461
462 #[test]
463 fn test_parse_id3v23_utf16_bom_le() {
464 let text_utf16: Vec<u8> = {
466 let mut v = vec![0xFF, 0xFE]; for ch in "Hello".encode_utf16() {
468 v.extend_from_slice(&ch.to_le_bytes());
469 }
470 v
471 };
472
473 let mut frames_data = Vec::new();
474 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 1, &text_utf16));
475
476 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
477 let mut cursor = Cursor::new(tag_bytes);
478
479 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
480 assert_eq!(tag.title().as_deref(), Some("Hello"));
481 }
482
483 #[test]
484 fn test_parse_id3v23_utf16_bom_be() {
485 let text_utf16: Vec<u8> = {
487 let mut v = vec![0xFE, 0xFF]; for ch in "World".encode_utf16() {
489 v.extend_from_slice(&ch.to_be_bytes());
490 }
491 v
492 };
493
494 let mut frames_data = Vec::new();
495 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 1, &text_utf16));
496
497 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
498 let mut cursor = Cursor::new(tag_bytes);
499
500 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
501 assert_eq!(tag.title().as_deref(), Some("World"));
502 }
503
504 #[test]
507 fn test_parse_id3v24_syncsafe_sizes() {
508 let mut frames_data = Vec::new();
509 frames_data.extend_from_slice(&build_v24_text_frame("TIT2", 3, b"v2.4 Title"));
510 frames_data.extend_from_slice(&build_v24_text_frame("TPE1", 3, b"v2.4 Artist"));
511 frames_data.extend_from_slice(&build_v24_text_frame("TDRC", 3, b"2025"));
512
513 let tag_bytes = build_id3v2_tag(4, 0, &frames_data);
514 let mut cursor = Cursor::new(tag_bytes);
515
516 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
517 assert_eq!(tag.version, (4, 0));
518 assert_eq!(tag.title().as_deref(), Some("v2.4 Title"));
519 assert_eq!(tag.artist().as_deref(), Some("v2.4 Artist"));
520 assert_eq!(tag.year().as_deref(), Some("2025"));
521 }
522
523 #[test]
524 fn test_parse_id3v24_year_falls_back_to_tyer() {
525 let mut frames_data = Vec::new();
527 frames_data.extend_from_slice(&build_v24_text_frame("TYER", 3, b"1999"));
528
529 let tag_bytes = build_id3v2_tag(4, 0, &frames_data);
530 let mut cursor = Cursor::new(tag_bytes);
531
532 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
533 assert_eq!(tag.year().as_deref(), Some("1999"));
534 }
535
536 #[test]
539 fn test_no_id3_header_returns_none() {
540 let data = b"MAC \x00\x00\x00\x00some APE data";
541 let mut cursor = Cursor::new(data.to_vec());
542 let result = read_id3v2(&mut cursor).unwrap();
543 assert!(result.is_none());
544 }
545
546 #[test]
547 fn test_empty_stream_returns_none() {
548 let mut cursor = Cursor::new(Vec::new());
549 let result = read_id3v2(&mut cursor).unwrap();
550 assert!(result.is_none());
551 }
552
553 #[test]
554 fn test_truncated_header_returns_none() {
555 let mut cursor = Cursor::new(b"ID3".to_vec());
556 let result = read_id3v2(&mut cursor).unwrap();
557 assert!(result.is_none());
558 }
559
560 #[test]
561 fn test_zero_length_text_frame() {
562 let mut frames_data = Vec::new();
564 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 0, b""));
565
566 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
567 let mut cursor = Cursor::new(tag_bytes);
568
569 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
570 assert!(tag.title().is_none());
571 }
572
573 #[test]
574 fn test_invalid_encoding_byte() {
575 let mut frames_data = Vec::new();
576 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 99, b"Bad Encoding"));
577
578 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
579 let mut cursor = Cursor::new(tag_bytes);
580
581 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
582 assert!(tag.title().is_none());
583 }
584
585 #[test]
586 fn test_unsynchronization_rejected() {
587 let frames_data = build_v23_text_frame("TIT2", 0, b"Test");
588 let tag_bytes = build_id3v2_tag(3, 0x80, &frames_data);
589 let mut cursor = Cursor::new(tag_bytes);
590
591 let result = read_id3v2(&mut cursor);
592 assert!(result.is_err());
593 }
594
595 #[test]
596 fn test_iso8859_high_bytes() {
597 let text: Vec<u8> = vec![0xC9, 0x6C, 0xE8, 0x76, 0x65]; let mut frames_data = Vec::new();
600 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 0, &text));
601
602 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
603 let mut cursor = Cursor::new(tag_bytes);
604
605 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
606 let title = tag.title().unwrap();
607 assert_eq!(title, "\u{00C9}l\u{00E8}ve");
608 }
609
610 #[test]
611 fn test_text_with_trailing_nul() {
612 let mut frames_data = Vec::new();
613 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 0, b"Trimmed\x00"));
614
615 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
616 let mut cursor = Cursor::new(tag_bytes);
617
618 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
619 assert_eq!(tag.title().as_deref(), Some("Trimmed"));
620 }
621
622 #[test]
623 fn test_multiple_frames_mixed_encodings() {
624 let utf16_text: Vec<u8> = {
625 let mut v = vec![0xFF, 0xFE]; for ch in "UTF-16 Title".encode_utf16() {
627 v.extend_from_slice(&ch.to_le_bytes());
628 }
629 v
630 };
631
632 let mut frames_data = Vec::new();
633 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 1, &utf16_text));
634 frames_data.extend_from_slice(&build_v23_text_frame("TPE1", 0, b"Latin1 Artist"));
635 frames_data.extend_from_slice(&build_v23_text_frame("TALB", 3, "UTF-8 Album".as_bytes()));
636
637 let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
638 let mut cursor = Cursor::new(tag_bytes);
639
640 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
641 assert_eq!(tag.title().as_deref(), Some("UTF-16 Title"));
642 assert_eq!(tag.artist().as_deref(), Some("Latin1 Artist"));
643 assert_eq!(tag.album().as_deref(), Some("UTF-8 Album"));
644 }
645
646 #[test]
647 fn test_padding_after_frames() {
648 let mut frames_data = Vec::new();
650 frames_data.extend_from_slice(&build_v23_text_frame("TIT2", 0, b"Padded"));
651 frames_data.extend_from_slice(&[0u8; 64]); let tag_bytes = build_id3v2_tag(3, 0, &frames_data);
654 let mut cursor = Cursor::new(tag_bytes);
655
656 let tag = read_id3v2(&mut cursor).unwrap().unwrap();
657 assert_eq!(tag.frames.len(), 1);
658 assert_eq!(tag.title().as_deref(), Some("Padded"));
659 }
660
661 #[test]
662 fn test_real_ape_fixtures_no_crash() {
663 use std::fs::File;
664 use std::io::BufReader;
665 use std::path::PathBuf;
666
667 let fixtures_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/ape");
668
669 let entries = std::fs::read_dir(&fixtures_dir);
670 if entries.is_err() {
671 return;
673 }
674
675 for entry in entries.unwrap().flatten() {
676 let path = entry.path();
677 if path.extension().is_some_and(|e| e == "ape") {
678 let file = File::open(&path).unwrap();
679 let mut reader = BufReader::new(file);
680 let result = read_id3v2(&mut reader);
682 match result {
684 Ok(None) => {} Ok(Some(_)) => {} Err(_) => {} }
688 }
689 }
690 }
691}