Skip to main content

musefs_format/
mp3.rs

1use crate::convert::usize_from;
2use crate::error::{FormatError, Result};
3use crate::input::{
4    ArtInput, BinaryTagInput, EmbeddedBinaryTag, EmbeddedPicture, PictureType, TagInput,
5};
6use crate::layout::{RegionLayout, Segment};
7use crate::probe::Extent;
8use crate::size;
9
10/// Where the MP3 audio frames begin and end (excluding any ID3v2 prefix and
11/// ID3v1 trailer). Unlike FLAC there is no preserved structural metadata: the
12/// ID3v2 tag is regenerated from the DB, and the Xing/LAME info frame lives
13/// inside the first audio frame, carried by the backing-audio segment.
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Mp3Bounds {
16    pub audio_offset: u64,
17    pub audio_length: u64,
18}
19
20fn synchsafe_decode(b: &[u8]) -> u32 {
21    u32::from(b[0] & 0x7F) << 21
22        | u32::from(b[1] & 0x7F) << 14
23        | u32::from(b[2] & 0x7F) << 7
24        | u32::from(b[3] & 0x7F)
25}
26
27fn id3v2_header_len(data: &[u8]) -> Result<Option<usize>> {
28    if data.len() < 10 || &data[0..3] != b"ID3" {
29        return Ok(None);
30    }
31    if !matches!(data[3], 2..=4) {
32        return Err(FormatError::Malformed);
33    }
34    // A well-formed synchsafe size has the high bit clear in every byte; reject
35    // if any size byte has it set (the id3 crate may not mask those bits).
36    if data[6..10].iter().any(|&b| b & 0x80 != 0) {
37        return Err(FormatError::Malformed);
38    }
39    Ok(Some(10 + synchsafe_decode(&data[6..10]) as usize))
40}
41
42/// Locate the audio region: skip a leading ID3v2 tag (if present) and a trailing
43/// 128-byte ID3v1 tag (if present), then require an MPEG frame sync at the audio
44/// offset. The synthesized file re-prepends a fresh ID3v2 tag, so the original
45/// one is intentionally *not* preserved.
46pub fn locate_audio(data: &[u8]) -> Result<Mp3Bounds> {
47    let len = data.len();
48
49    let mut audio_offset = 0usize;
50    if let Some(base) = id3v2_header_len(data)? {
51        let flags = data[5];
52        let mut tag_len = base;
53        if flags & 0x10 != 0 {
54            tag_len += 10; // ID3v2.4 footer
55        }
56        if tag_len > len {
57            return Err(FormatError::Malformed);
58        }
59        audio_offset = tag_len;
60    }
61
62    let mut audio_end = len;
63    if audio_end >= audio_offset + 128 && &data[audio_end - 128..audio_end - 125] == b"TAG" {
64        audio_end -= 128; // strip ID3v1 trailer
65    }
66
67    // Require an MPEG audio frame sync (11 set bits) at the audio offset.
68    if audio_offset + 1 >= len
69        || data[audio_offset] != 0xFF
70        || (data[audio_offset + 1] & 0xE0) != 0xE0
71    {
72        return Err(FormatError::NotMp3);
73    }
74
75    Ok(Mp3Bounds {
76        audio_offset: audio_offset as u64,
77        audio_length: (audio_end - audio_offset) as u64,
78    })
79}
80
81/// Bounded twin of [`locate_audio`]. `prefix` is a front window; `file_len` is the
82/// true size; `tail` is the file's last 128 bytes (or `None` if the file is
83/// shorter than 128 bytes). The audio start is the end of any leading ID3v2 tag
84/// (declared in its 10-byte header); if that end is past the prefix, returns
85/// `NeedMore`. The audio end is `file_len` minus a 128-byte ID3v1 trailer when the
86/// `tail` begins with `TAG`.
87pub fn locate_audio_bounded(
88    prefix: &[u8],
89    file_len: u64,
90    tail: Option<&[u8; 128]>,
91) -> Result<Extent<Mp3Bounds>> {
92    let mut audio_offset = 0usize;
93    if prefix.len() < 10 && file_len >= 10 {
94        // Not enough bytes even to read the ID3v2 header.
95        return Ok(Extent::NeedMore { up_to: 10 });
96    }
97    if let Some(base) = id3v2_header_len(prefix)? {
98        let flags = prefix[5];
99        let mut tag_len = base;
100        if flags & 0x10 != 0 {
101            tag_len += 10; // ID3v2.4 footer
102        }
103        if tag_len as u64 > file_len {
104            return Err(FormatError::Malformed);
105        }
106        audio_offset = tag_len;
107    }
108
109    // The audio start (plus its 2-byte frame sync) must fit in the file. Mirrors
110    // the unbounded `locate_audio`'s `audio_offset + 1 >= len` reject: without
111    // this, a tag that claims audio begins at/after EOF would return `NeedMore`
112    // with `up_to > file_len`, and the caller would widen to the full file and
113    // get the same answer every retry instead of failing fast.
114    if audio_offset as u64 + 2 > file_len {
115        return Err(FormatError::NotMp3);
116    }
117
118    // Need the frame-sync pair at the audio offset to be inside the prefix.
119    if audio_offset + 2 > prefix.len() {
120        return Ok(Extent::NeedMore {
121            up_to: (audio_offset + 2) as u64,
122        });
123    }
124
125    if prefix[audio_offset] != 0xFF || (prefix[audio_offset + 1] & 0xE0) != 0xE0 {
126        return Err(FormatError::NotMp3);
127    }
128
129    let mut audio_end = file_len;
130    if let Some(tail) = tail
131        && file_len >= audio_offset as u64 + 128
132        && &tail[0..3] == b"TAG"
133    {
134        audio_end -= 128;
135    }
136
137    Ok(Extent::Complete(Mp3Bounds {
138        audio_offset: audio_offset as u64,
139        audio_length: audio_end - audio_offset as u64,
140    }))
141}
142
143const ENC_UTF8: u8 = 0x03;
144
145fn syncsafe(n: u32) -> [u8; 4] {
146    [
147        ((n >> 21) & 0x7F) as u8,
148        ((n >> 14) & 0x7F) as u8,
149        ((n >> 7) & 0x7F) as u8,
150        (n & 0x7F) as u8,
151    ]
152}
153
154/// Inclusive maximum of a 28-bit ID3v2.4 syncsafe size field.
155const SYNCHSAFE_MAX: u32 = 0x0FFF_FFFF;
156
157fn push_frame_header(out: &mut Vec<u8>, id: &[u8; 4], data_len: usize) -> Result<()> {
158    // ID3v2.4 frame sizes are a 28-bit syncsafe field; guard so an oversized frame
159    // is a hard error rather than a silently-truncated (corrupt) tag.
160    let data_len_u32 = u32::try_from(data_len)
161        .ok()
162        .filter(|&v| v <= SYNCHSAFE_MAX)
163        .ok_or(FormatError::TooLarge)?;
164    out.extend_from_slice(id);
165    out.extend_from_slice(&syncsafe(data_len_u32));
166    out.extend_from_slice(&[0x00, 0x00]); // frame flags
167    Ok(())
168}
169
170fn text_frame_data(values: &[String]) -> Vec<u8> {
171    let mut d = vec![ENC_UTF8];
172    d.extend_from_slice(values.join("\0").as_bytes());
173    d
174}
175
176fn txxx_frame_data(desc: &str, value: &str) -> Vec<u8> {
177    let mut d = vec![ENC_UTF8];
178    d.extend_from_slice(desc.as_bytes());
179    d.push(0x00);
180    d.extend_from_slice(value.as_bytes());
181    d
182}
183
184/// COMM/USLT share a body layout: `[enc][lang(3)][descriptor NUL][text]`. We
185/// write UTF-8 with an unknown language (`XXX`) and empty descriptor; the
186/// original language code and descriptor are not preserved on round-trip.
187fn comm_like_frame_data(value: &str) -> Vec<u8> {
188    let mut d = vec![ENC_UTF8];
189    d.extend_from_slice(b"XXX"); // language: unknown
190    d.push(0x00); // empty content descriptor, NUL-terminated
191    d.extend_from_slice(value.as_bytes());
192    d
193}
194
195/// True if `key` is shaped like an ID3v2 text frame id (`T` + 3 upper/digit),
196/// excluding `TXXX` itself. Used to round-trip unmapped standard text frames.
197fn is_id3_text_frame_id(key: &str) -> bool {
198    key.len() == 4
199        && key != "TXXX"
200        && key.starts_with('T')
201        && key
202            .bytes()
203            .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit())
204}
205
206/// APIC frame data up to (but excluding) the image bytes:
207/// `[encoding][mime\0][picture type][description\0]`.
208fn apic_framing(art: &ArtInput) -> Vec<u8> {
209    let mut d = vec![ENC_UTF8];
210    d.extend_from_slice(art.mime.as_bytes());
211    d.push(0x00);
212    #[expect(
213        clippy::cast_possible_truncation,
214        reason = "ID3 APIC type is one byte; valid picture types are 0..=20"
215    )]
216    d.push(art.picture_type.get() as u8);
217    d.extend_from_slice(art.description.as_bytes());
218    d.push(0x00);
219    d
220}
221
222/// POPM body: `<owner>\0<rating:u8>[<counter: 4-byte big-endian>]`. Owner is empty
223/// by design (spec §5 — the original tagger identity is dropped). The counter is
224/// emitted as 4 bytes when `playcount > 0` and omitted when 0; values above
225/// `u32::MAX` are clamped (the typed read path caps at u64, the common case fits
226/// u32).
227fn popm_frame_data(rating: u8, playcount: u64) -> Vec<u8> {
228    let mut d = Vec::new();
229    d.push(0x00); // empty owner, NUL-terminated
230    d.push(rating);
231    if playcount > 0 {
232        let c = u32::try_from(playcount).unwrap_or(u32::MAX);
233        d.extend_from_slice(&c.to_be_bytes());
234    }
235    d
236}
237
238/// UFID body: `<owner>\0<identifier bytes>`.
239fn ufid_frame_data(owner: &str, identifier: &[u8]) -> Vec<u8> {
240    let mut d = Vec::new();
241    d.extend_from_slice(owner.as_bytes());
242    d.push(0x00);
243    d.extend_from_slice(identifier);
244    d
245}
246
247/// True for the canonical text keys that are rebuilt as POPM/UFID frames and must
248/// therefore be excluded from the generic text/TXXX emission (no double-store).
249fn is_promoted_key(key: &str) -> bool {
250    matches!(key, "rating" | "playcount" | "musicbrainz_trackid")
251}
252
253/// Build the ID3v2.4 tag region for `tags`/`arts`: an inline 10-byte header
254/// followed by text/`TXXX` frames and `APIC` frames whose image bytes are
255/// streamed as `ArtImage` segments. Returns the segments (no backing audio) and
256/// the total tag length (`10 + frames_len`). Shared by MP3 synthesis and the WAV
257/// `id3 ` chunk.
258pub fn build_id3v2_segments(
259    tags: &[TagInput],
260    binary_tags: &[BinaryTagInput],
261    arts: &[ArtInput],
262) -> Result<(Vec<Segment>, u64)> {
263    // Pull the promoted scalar values out of `tags`: first `rating` /
264    // `musicbrainz_trackid` wins, `playcount` takes the last parseable value. A
265    // single POPM/UFID is the norm, so this only diverges from "first wins" for
266    // the rare multi-frame tag.
267    let mut popm_rating: Option<u8> = None;
268    let mut popm_playcount: u64 = 0;
269    let mut mbid: Option<String> = None;
270    for t in tags {
271        match t.key.as_str() {
272            "rating" if popm_rating.is_none() => popm_rating = t.value.parse().ok(),
273            "playcount" => popm_playcount = t.value.parse().unwrap_or(popm_playcount),
274            "musicbrainz_trackid" if mbid.is_none() => mbid = Some(t.value.clone()),
275            _ => {}
276        }
277    }
278
279    // Group consecutive same-key values (the DB returns tags ordered by key),
280    // skipping promoted keys so they never enter the generic text/TXXX path
281    // (no double-store).
282    let mut groups: Vec<(String, Vec<String>)> = Vec::new();
283    for t in tags {
284        if is_promoted_key(&t.key) {
285            continue;
286        }
287        match groups.last_mut() {
288            Some(g) if g.0 == t.key => g.1.push(t.value.clone()),
289            _ => groups.push((t.key.clone(), vec![t.value.clone()])),
290        }
291    }
292
293    let mut segments: Vec<Segment> = Vec::new();
294    let mut buf: Vec<u8> = Vec::new();
295    let mut frames_len: u64 = 0;
296
297    for (key, values) in &groups {
298        match crate::tagmap::key_to_id3(key) {
299            Some(crate::tagmap::Id3Slot::Text(id)) => {
300                let data = text_frame_data(values);
301                push_frame_header(&mut buf, id, data.len())?;
302                buf.extend_from_slice(&data);
303                frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
304            }
305            Some(crate::tagmap::Id3Slot::Txxx(desc)) => {
306                for value in values {
307                    let data = txxx_frame_data(desc, value);
308                    push_frame_header(&mut buf, b"TXXX", data.len())?;
309                    buf.extend_from_slice(&data);
310                    frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
311                }
312            }
313            Some(crate::tagmap::Id3Slot::Comment) => {
314                for value in values {
315                    let data = comm_like_frame_data(value);
316                    push_frame_header(&mut buf, b"COMM", data.len())?;
317                    buf.extend_from_slice(&data);
318                    frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
319                }
320            }
321            Some(crate::tagmap::Id3Slot::Lyrics) => {
322                for value in values {
323                    let data = comm_like_frame_data(value);
324                    push_frame_header(&mut buf, b"USLT", data.len())?;
325                    buf.extend_from_slice(&data);
326                    frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
327                }
328            }
329            None if is_id3_text_frame_id(key) => {
330                // safe: is_id3_text_frame_id guarantees key is exactly 4 bytes
331                let id: [u8; 4] = key.as_bytes().try_into().unwrap();
332                let data = text_frame_data(values);
333                push_frame_header(&mut buf, &id, data.len())?;
334                buf.extend_from_slice(&data);
335                frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
336            }
337            None => {
338                for value in values {
339                    let data = txxx_frame_data(key, value);
340                    push_frame_header(&mut buf, b"TXXX", data.len())?;
341                    buf.extend_from_slice(&data);
342                    frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
343                }
344            }
345        }
346    }
347
348    // Rebuilt promoted frames (POPM from rating/playcount, UFID from MBID).
349    if let Some(rating) = popm_rating {
350        let data = popm_frame_data(rating, popm_playcount);
351        push_frame_header(&mut buf, b"POPM", data.len())?;
352        buf.extend_from_slice(&data);
353        frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
354    }
355    if let Some(id) = &mbid {
356        let data = ufid_frame_data(MUSICBRAINZ_UFID_OWNER, id.as_bytes());
357        push_frame_header(&mut buf, b"UFID", data.len())?;
358        buf.extend_from_slice(&data);
359        frames_len = size::checked_add(frames_len, 10 + data.len() as u64)?;
360    }
361
362    // Opaque binary frames: header (inline) + streamed body (BinaryTag segment).
363    for bt in binary_tags {
364        // Defensive: ID3 opaque keys are 4-byte frame ids.
365        let Ok(id): std::result::Result<[u8; 4], _> = bt.key.as_bytes().try_into() else {
366            continue;
367        };
368        push_frame_header(&mut buf, &id, usize_from(bt.len.get()))?;
369        segments.push(Segment::Inline(std::mem::take(&mut buf)));
370        segments.push(Segment::BinaryTag {
371            payload_id: bt.payload_id,
372            len: bt.len,
373        });
374        frames_len = size::checked_add(frames_len, size::checked_add(10, bt.len.get())?)?;
375    }
376
377    for art in arts {
378        let framing = apic_framing(art);
379        let data_len = size::checked_add(framing.len() as u64, art.data_len.get())?;
380        push_frame_header(&mut buf, b"APIC", usize_from(data_len))?;
381        buf.extend_from_slice(&framing);
382        segments.push(Segment::Inline(std::mem::take(&mut buf)));
383        segments.push(Segment::ArtImage {
384            art_id: art.art_id,
385            len: art.data_len,
386        });
387        frames_len = size::checked_add(frames_len, size::checked_add(10, data_len)?)?;
388    }
389
390    if !buf.is_empty() {
391        segments.push(Segment::Inline(std::mem::take(&mut buf)));
392    }
393
394    // Prepend the 10-byte ID3v2.4 header now that the total frame length is known.
395    let mut header = Vec::with_capacity(10);
396    header.extend_from_slice(b"ID3");
397    header.extend_from_slice(&[0x04, 0x00]); // version 2.4.0
398    header.push(0x00); // flags: no unsync / extended header / footer
399
400    // The total tag size is a 28-bit syncsafe field. Ingestion caps each art well
401    // under this, but guard at the format boundary so an oversized tag (e.g. many
402    // large pictures summing past the limit) is a hard error, not a truncated file.
403    let frames_len_ss = u32::try_from(frames_len)
404        .ok()
405        .filter(|&v| v <= SYNCHSAFE_MAX)
406        .ok_or(FormatError::TooLarge)?;
407    header.extend_from_slice(&syncsafe(frames_len_ss));
408    segments.insert(0, Segment::Inline(header));
409
410    Ok((segments, size::checked_add(10, frames_len)?))
411}
412
413/// Build the synthesized region for an MP3: a fresh ID3v2.4 tag (text frames +
414/// APIC frames, with image bytes streamed as `ArtImage` segments) followed by the
415/// backing audio.
416pub fn synthesize_layout(
417    audio_offset: u64,
418    audio_length: u64,
419    tags: &[TagInput],
420    binary_tags: &[BinaryTagInput],
421    arts: &[ArtInput],
422) -> Result<RegionLayout> {
423    let (mut segments, _tag_len) = build_id3v2_segments(tags, binary_tags, arts)?;
424    segments.push(Segment::BackingAudio {
425        offset: audio_offset,
426        len: audio_length,
427    });
428    Ok(RegionLayout::validated(segments)?)
429}
430
431/// Returns false when `data` begins with an ID3v2 tag whose declared frame sizes
432/// could drive an unbounded allocation in the `id3` crate (which eagerly
433/// `with_capacity`s a frame's declared size — and ID3v2.3 frame sizes are plain
434/// 32-bit, up to 4 GiB). When false, callers skip ID3 parsing (yielding no tags
435/// for that file) rather than risk an OOM. Conservative: tags using an extended
436/// header or unsynchronisation, a malformed synchsafe body/frame-size field
437/// (any byte with high bit set), or an unrecognised major version are skipped
438/// (those files lose scan-time tag extraction, but cannot OOM the scanner).
439/// Files without an ID3v2 tag return true (the id3 crate handles them cheaply).
440fn id3v2_alloc_safe(data: &[u8]) -> bool {
441    // id3::Tag::read_from2 scans forward to locate a tag, so handing it any
442    // buffer that is not a validated ID3v2 tag at offset 0 risks the unbounded
443    // allocation we are guarding against. Only parse when an ID3v2 header is at
444    // offset 0 (and its frames validate, below). Trade-off: scan-time tag
445    // extraction for ID3v1-only files (no leading ID3v2 header) is skipped;
446    // ID3v1 is legacy/fixed-size and tags can be populated via the DB
447    // (beets/picard) regardless.
448    let Ok(Some(tag_end)) = id3v2_header_len(data) else {
449        // Not an ID3v2 tag at offset 0, or a malformed header: skip parsing.
450        return false;
451    };
452    let flags = data[5];
453    // Extended header (0x40) and unsynchronisation (0x80) complicate frame
454    // bounds; skip rather than risk mis-validating.
455    if flags & 0xC0 != 0 {
456        return false;
457    }
458    if tag_end > data.len() {
459        return false;
460    }
461    let major = data[3];
462    let header_len = if major == 2 { 6 } else { 10 };
463    // Walk frames over the entire remaining buffer (not just [10, tag_end)):
464    // the id3 crate does not consistently stop at the declared tag body and
465    // can walk and allocate from bytes beyond tag_end.  Any incomplete frame
466    // header visible in data (i.e. pos + header_len <= data.len()) is also
467    // validated.  We still reject if a frame's declared size exceeds tag_end.
468    let scan_end = data.len();
469    let mut pos = 10usize;
470    while pos + header_len <= scan_end {
471        // A zero first id byte marks the start of the padding region.
472        if data[pos] == 0 {
473            break;
474        }
475        // CHAP and CTOC frames contain embedded sub-frames; the id3 crate
476        // allocates based on those sub-frame sizes, creating a recursive OOM
477        // vector.  Reject tags containing either frame type (v2.3/v2.4 only;
478        // v2.2 uses 3-byte frame ids and never defines chapter frames).
479        if major != 2 && (&data[pos..pos + 4] == b"CHAP" || &data[pos..pos + 4] == b"CTOC") {
480            return false;
481        }
482        let size = if major == 2 {
483            u32::from_be_bytes([0, data[pos + 3], data[pos + 4], data[pos + 5]]) as usize
484        } else if major == 3 {
485            // ID3v2.3: plain 32-bit big-endian frame size.
486            // Frame flags at pos+8..pos+10: reject any non-zero flags.  The id3
487            // crate handles COMPRESSION (0x0080) by subtracting 4 from the size
488            // (panicking if size < 4), and ENCRYPTION/GROUPING_IDENTITY by
489            // returning errors; rejecting all non-zero frame flags avoids those
490            // paths entirely.
491            if data[pos + 8] != 0 || data[pos + 9] != 0 {
492                return false;
493            }
494            u32::from_be_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
495                as usize
496        } else {
497            // ID3v2.4: synchsafe frame size.  Reject if any byte has its high
498            // bit set (malformed synchsafe), for the same reason as the body.
499            // Also reject non-zero frame flags for the same reasons as v2.3.
500            if data[pos + 4] | data[pos + 5] | data[pos + 6] | data[pos + 7] >= 0x80 {
501                return false;
502            }
503            if data[pos + 8] != 0 || data[pos + 9] != 0 {
504                return false;
505            }
506            synchsafe_decode(&data[pos + 4..pos + 8]) as usize
507        };
508        let data_start = pos + header_len;
509        // Reject if the frame header itself extends past the declared tag body,
510        // or if the frame payload claims more bytes than the remaining body.
511        // The id3 crate would otherwise attempt to subtract or allocate with
512        // an invalid size, causing a panic or OOM.
513        if data_start > tag_end || size > tag_end - data_start {
514            return false;
515        }
516        pos = data_start + size;
517        // Stop once we have walked past the declared tag body: any subsequent
518        // bytes are audio or trailing tags, not ID3v2 frames.
519        if pos >= tag_end {
520            break;
521        }
522    }
523    true
524}
525
526/// Extract all APIC pictures from an MP3's ID3v2 tag as embedded pictures, for
527/// scan-time art ingestion. Returns empty if there is no tag or no pictures.
528pub fn read_pictures(data: &[u8]) -> Vec<EmbeddedPicture> {
529    if !id3v2_alloc_safe(data) {
530        return Vec::new();
531    }
532    let Ok(tag) = id3::Tag::read_from2(std::io::Cursor::new(data)) else {
533        return Vec::new();
534    };
535    tag.pictures()
536        .map(|p| EmbeddedPicture {
537            mime: p.mime_type.clone(),
538            // The id3 crate's PictureType has an `Undefined(u8)` variant that can
539            // exceed 20; clamp out-of-range to 0, matching the FLAC parser and
540            // scan's prior policy.
541            picture_type: PictureType::new(u8::from(p.picture_type).into())
542                .unwrap_or(PictureType::ZERO),
543            description: p.description.clone(),
544            width: 0,
545            height: 0,
546            data: p.data.clone(),
547        })
548        .collect()
549}
550
551/// Read an existing ID3v2 tag and fold it into canonical `(key, value)` pairs.
552/// Text frames map via the vocabulary (NUL-separated multi-value yields one pair
553/// per value); unmapped text frames pass through keyed by their frame id; `TXXX`
554/// frames key on their description (folded to canonical when known); `COMM`/`USLT`
555/// yield `comment`/`lyrics` (text only). Other/binary frames are skipped.
556/// Multiple `COMM` or `USLT` frames (e.g. one per language) each emit a separate
557/// pair; their language and description fields are not preserved.
558pub fn read_tags(data: &[u8]) -> Vec<(String, String)> {
559    if !id3v2_alloc_safe(data) {
560        return Vec::new();
561    }
562    let Ok(tag) = id3::Tag::read_from2(std::io::Cursor::new(data)) else {
563        return Vec::new();
564    };
565    let mut out = Vec::new();
566    for frame in tag.frames() {
567        let content = frame.content();
568        if let Some(et) = content.extended_text() {
569            let key = crate::tagmap::id3_txxx_to_key(&et.description)
570                .map_or_else(|| et.description.clone(), str::to_string);
571            out.push((key, et.value.clone()));
572        } else if let Some(c) = content.comment() {
573            out.push(("comment".to_string(), c.text.clone()));
574        } else if let Some(l) = content.lyrics() {
575            out.push(("lyrics".to_string(), l.text.clone()));
576        } else if let Some(text) = content.text() {
577            let id = frame.id();
578            let key =
579                crate::tagmap::id3_text_to_key(id).map_or_else(|| id.to_string(), str::to_string);
580            for value in text.split('\0').filter(|v| !v.is_empty()) {
581                out.push((key.clone(), value.to_string()));
582            }
583        }
584    }
585    out
586}
587
588pub(crate) const MUSICBRAINZ_UFID_OWNER: &str = "http://musicbrainz.org";
589
590/// Extract an ID3v2.3/2.4 tag's binary frames. Returns `(opaque, promoted)`:
591/// - `opaque`: frames preserved **byte-exact** — `(frame-id, raw post-header body)`.
592///   `PRIV`/`GEOB`/`SYLT`/`MCDI`/unknown frames and any non-MusicBrainz `UFID`.
593/// - `promoted`: `(key, value)` text pairs — `POPM` → `rating` (raw 0–255) + `playcount`
594///   (counter, omitted when 0); MusicBrainz `UFID` → `musicbrainz_trackid`. Promoted
595///   frames are NOT in `opaque`.
596///
597/// Text (`T***`), `COMM`, `USLT`, `APIC` are handled by `read_tags`/`read_pictures`
598/// and skipped. Gated by `id3v2_alloc_safe`, so the tag is well-formed, has no
599/// unsynchronisation/extended header/frame flags, and bodies are sliced verbatim.
600/// v2.2 (3-char ids) is not processed (rare; text/art still parse via the crate).
601pub fn read_binary_tags(data: &[u8]) -> (Vec<EmbeddedBinaryTag>, Vec<(String, String)>) {
602    let mut opaque = Vec::new();
603    let mut promoted = Vec::new();
604    if !id3v2_alloc_safe(data) || data[3] < 3 {
605        return (opaque, promoted);
606    }
607    let tag_end = 10 + synchsafe_decode(&data[6..10]) as usize;
608    let mut pos = 10usize;
609    while pos + 10 <= tag_end {
610        if data[pos] == 0 {
611            break;
612        }
613        let id = &data[pos..pos + 4];
614        let size = if data[3] == 3 {
615            u32::from_be_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
616                as usize
617        } else {
618            synchsafe_decode(&data[pos + 4..pos + 8]) as usize
619        };
620        let body_start = pos + 10;
621        if body_start + size > tag_end {
622            break;
623        }
624        classify_binary_frame(
625            id,
626            &data[body_start..body_start + size],
627            &mut opaque,
628            &mut promoted,
629        );
630        pos = body_start + size;
631    }
632    (opaque, promoted)
633}
634
635/// Classify one ID3v2 frame body into opaque-passthrough or promoted-text.
636fn classify_binary_frame(
637    id: &[u8],
638    body: &[u8],
639    opaque: &mut Vec<EmbeddedBinaryTag>,
640    promoted: &mut Vec<(String, String)>,
641) {
642    // Handled by read_tags/read_pictures: text frames (T***), COMM, USLT, APIC.
643    if id[0] == b'T' || id == b"COMM" || id == b"USLT" || id == b"APIC" {
644        return;
645    }
646    match id {
647        b"POPM" => {
648            // <owner>\0<rating:u8>[<counter: big-endian>]
649            if let Some(nul) = body.iter().position(|&b| b == 0)
650                && let Some((&rating, counter)) = body[nul + 1..].split_first()
651            {
652                promoted.push(("rating".to_string(), rating.to_string()));
653                let c = counter
654                    .iter()
655                    .take(8)
656                    .fold(0u64, |a, &b| (a << 8) | u64::from(b));
657                if c > 0 {
658                    promoted.push(("playcount".to_string(), c.to_string()));
659                }
660            }
661        }
662        b"UFID" => {
663            // <owner>\0<identifier>. MusicBrainz owner promotes; others opaque.
664            match body.iter().position(|&b| b == 0) {
665                Some(nul) if &body[..nul] == MUSICBRAINZ_UFID_OWNER.as_bytes() => {
666                    promoted.push((
667                        "musicbrainz_trackid".to_string(),
668                        String::from_utf8_lossy(&body[nul + 1..]).into_owned(),
669                    ));
670                }
671                _ => opaque.push(EmbeddedBinaryTag {
672                    key: "UFID".to_string(),
673                    payload: body.to_vec(),
674                }),
675            }
676        }
677        _ => {
678            // Opaque verbatim: PRIV, GEOB, SYLT, MCDI, W***, unknown, … (4-byte ids).
679            if id.iter().all(u8::is_ascii_graphic) {
680                opaque.push(EmbeddedBinaryTag {
681                    key: String::from_utf8_lossy(id).into_owned(),
682                    payload: body.to_vec(),
683                });
684            }
685        }
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692    use crate::input::{BlobLen, PictureType};
693
694    /// Build a minimal ID3v2.3 tag with a single frame whose declared size
695    /// overflows the tag bounds, and assert the guard rejects it.
696    #[test]
697    fn id3v2_guard_rejects_oversized_v23_frame() {
698        // Tag header: b"ID3" major=3 rev=0 flags=0
699        // Synchsafe body size encoding 10 (= one 10-byte frame header, no payload):
700        //   syncsafe(10) = [0, 0, 0, 0x0A]
701        // Frame: id=TIT2 (4 bytes), size=0xFFFF_FFFF (4 bytes, plain BE), flags=0x00 0x00
702        let mut bytes: Vec<u8> = Vec::new();
703        bytes.extend_from_slice(b"ID3");
704        bytes.push(0x03); // major version 2.3
705        bytes.push(0x00); // revision
706        bytes.push(0x00); // flags: no extended header, no unsync
707        // synchsafe body = 10 (covers exactly one 10-byte frame header)
708        bytes.extend_from_slice(&[0x00, 0x00, 0x00, 0x0A]);
709        // Frame header: id "TIT2", size 0xFFFF_FFFF (big-endian, plain 32-bit)
710        bytes.extend_from_slice(b"TIT2");
711        bytes.extend_from_slice(&[0xFF, 0xFF, 0xFF, 0xFF]);
712        bytes.extend_from_slice(&[0x00, 0x00]); // frame flags
713
714        assert!(
715            !id3v2_alloc_safe(&bytes),
716            "guard should reject frame claiming more bytes than the tag holds"
717        );
718        // Must return quickly without OOM and produce no tags.
719        assert!(
720            read_tags(&bytes).is_empty(),
721            "read_tags must return empty for unsafe tag"
722        );
723    }
724
725    /// A buffer that does not start with "ID3" must be rejected by the guard.
726    /// id3::Tag::read_from2 scans forward to locate a tag, so any non-ID3-prefixed
727    /// buffer is unsafe regardless of what bytes appear later.
728    #[test]
729    fn id3v2_guard_rejects_non_id3_prefixed() {
730        // Plain non-ID3 bytes.
731        assert!(
732            !id3v2_alloc_safe(b"RIFF....just not an id3 tag...."),
733            "guard must reject buffer not starting with ID3"
734        );
735        assert!(
736            read_tags(b"RIFF....just not an id3 tag....").is_empty(),
737            "read_tags must return empty for non-ID3-prefixed buffer"
738        );
739
740        // The WAV crash vector: "RIFF..." body whose bytes do not start with "ID3"
741        // but contain a nested ID3v2.3 tag with a TDA frame declaring ~4 GiB.
742        // Extracted from fuzz/artifacts/wav/oom-4a21767820d5f05328f01d975fb6d3314f3fb902:
743        // the ID3 chunk body starts at offset 0x18 and begins with "RIFF".
744        const RIFF_BODY: &[u8] = &[
745            0x52, 0x49, 0x46, 0x46, 0x32, 0x00, 0x00, 0x00, // "RIFF2..."
746            0x57, 0x41, 0x56, 0x45, // "WAVE"
747            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
748            0x00, 0x00, 0x49, 0x44, 0x33, 0x20, // nested "ID3 " fourcc
749            0x15, 0x00, 0x00, 0x00, // chunk size = 21
750            0x49, 0x44, 0x33, // "ID3" — nested tag starts here
751            0x03, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x00, 0x54, 0x44, 0x41, 0x03, 0xf6, 0x00, 0x00,
752            0x00, // TDA frame size = 0xF600_0000 (~4 GiB)
753        ];
754        assert!(
755            !id3v2_alloc_safe(RIFF_BODY),
756            "guard must reject RIFF-prefixed buffer (WAV crash vector)"
757        );
758        assert!(
759            read_tags(RIFF_BODY).is_empty(),
760            "read_tags must return empty for RIFF-prefixed buffer"
761        );
762    }
763
764    /// Write a real ID3v2.4 tag via the id3 crate and confirm the guard allows it
765    /// and that read_tags extracts the expected values.
766    #[test]
767    fn id3v2_guard_allows_valid_tag() {
768        use id3::{Tag, TagLike, Version};
769
770        let mut tag = Tag::new();
771        tag.set_text("TIT2", "Hello");
772        tag.set_text("TPE1", "Artist");
773        let mut buf = Vec::new();
774        tag.write_to(&mut buf, Version::Id3v24).unwrap();
775
776        assert!(
777            id3v2_alloc_safe(&buf),
778            "guard should allow a well-formed tag written by the id3 crate"
779        );
780        let tags = read_tags(&buf);
781        assert!(
782            tags.contains(&("title".to_string(), "Hello".to_string())),
783            "missing title in {tags:?}"
784        );
785        assert!(
786            tags.contains(&("artist".to_string(), "Artist".to_string())),
787            "missing artist in {tags:?}"
788        );
789    }
790
791    /// Replay fuzz-discovered crash artifacts: tags that would OOM the id3 crate.
792    /// The guard must reject all of them and return empty without allocating.
793    #[test]
794    fn read_tags_handles_oom_crash_input_safely() {
795        // Artifact 1 (oom-a9b766b...): 30-byte ID3v2.3 tag with flags=0xf0
796        // (extended header + unsync bits set).  Guard rejects via flags & 0xC0.
797        // xxd fuzz/artifacts/mp3/oom-a9b766b841c2a964e72b01f31c174f25bf11b2d2
798        const CRASH1: &[u8] = &[
799            0x49, 0x44, 0x33, // "ID3"
800            0x03, 0xf0, // major=3, flags=0xf0 (extended header + unsync)
801            0x00, 0x00, 0xf9, 0x2d, // synchsafe body size
802            0x49, 0x50, 0x4c, 0x53, // frame id "IPLS"
803            0x00, 0xf9, 0x3d, 0x02, // frame size (big-endian)
804            0x00, 0x2d, 0x01, 0x00, // frame flags + data
805            0x00, 0x03, 0x00, 0x49, 0x07, 0x10, 0xff, 0x07, 0xfe,
806        ];
807        // Artifact 2 (oom-54f1f5e1...): 26-byte ID3v2.3 tag with a malformed
808        // synchsafe body field (data[9]=0x80, high bit set).  The id3 crate
809        // treated the raw value as 128, walked the oversized IPLS frame, and
810        // OOMed.  Guard rejects via the high-bit check on body bytes.
811        // xxd fuzz/artifacts/mp3/oom-54f1f5e197c4aa191f4aac77bc263939a4e4ee83
812        const CRASH2: &[u8] = &[
813            0x49, 0x44, 0x33, // "ID3"
814            0x03, 0x00, // major=3, flags=0 (no extended header / unsync)
815            0x00, 0x00, 0x00, 0x80, // body bytes: data[9]=0x80 — malformed synchsafe
816            0x0a, 0x27, 0x2f, 0x00, // frame id (partial)
817            0xff, 0xee, 0x01, 0x00, // frame size declares ~4 GB
818            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x2f,
819        ];
820        for (i, crash) in [CRASH1, CRASH2].iter().enumerate() {
821            assert!(
822                read_tags(crash).is_empty(),
823                "read_tags must be safe on crash artifact {i}"
824            );
825        }
826    }
827
828    #[test]
829    fn read_tags_captures_txxx_comm_uslt_and_unmapped_text() {
830        use id3::frame::{Comment, ExtendedText, Lyrics};
831        use id3::{Tag, TagLike, Version}; // TagLike brings set_text/add_frame into scope
832
833        let mut tag = Tag::new();
834        tag.set_text("TIT2", "Song");
835        tag.set_text("TBPM", "120"); // standard frame, not in vocabulary
836        tag.add_frame(ExtendedText {
837            description: "MOOD".into(),
838            value: "happy".into(),
839        });
840        tag.add_frame(ExtendedText {
841            description: "REPLAYGAIN_TRACK_GAIN".into(),
842            value: "-6.5 dB".into(),
843        });
844        tag.add_frame(Comment {
845            lang: "eng".into(),
846            description: String::new(),
847            text: "nice".into(),
848        });
849        tag.add_frame(Lyrics {
850            lang: "eng".into(),
851            description: String::new(),
852            text: "la la".into(),
853        });
854
855        let mut buf = Vec::new();
856        tag.write_to(&mut buf, Version::Id3v24).unwrap();
857
858        let tags = read_tags(&buf);
859        assert!(tags.contains(&("title".to_string(), "Song".to_string())));
860        assert!(tags.contains(&("TBPM".to_string(), "120".to_string())));
861        assert!(tags.contains(&("MOOD".to_string(), "happy".to_string())));
862        assert!(tags.contains(&("replaygain_track_gain".to_string(), "-6.5 dB".to_string())));
863        assert!(tags.contains(&("comment".to_string(), "nice".to_string())));
864        assert!(tags.contains(&("lyrics".to_string(), "la la".to_string())));
865    }
866
867    #[test]
868    fn synthesize_round_trips_arbitrary_id3_tags() {
869        let tags = vec![
870            TagInput::new("title", "Song"),
871            TagInput::new("TBPM", "120"),     // unmapped standard frame
872            TagInput::new("MyRating", "5"),   // user-defined -> TXXX
873            TagInput::new("comment", "nice"), // -> COMM
874            TagInput::new("lyrics", "la la"), // -> USLT
875            TagInput::new("replaygain_track_gain", "-3.21 dB"), // -> TXXX (fixed desc)
876        ];
877        let (segments, _len) = build_id3v2_segments(&tags, &[], &[]).unwrap();
878        let mut buf = Vec::new();
879        for seg in &segments {
880            if let Segment::Inline(bytes) = seg {
881                buf.extend_from_slice(bytes);
882            }
883        }
884        let read = read_tags(&buf);
885        for expected in [
886            ("title", "Song"),
887            ("TBPM", "120"),
888            ("MyRating", "5"),
889            ("comment", "nice"),
890            ("lyrics", "la la"),
891            ("replaygain_track_gain", "-3.21 dB"),
892        ] {
893            assert!(
894                read.contains(&(expected.0.to_string(), expected.1.to_string())),
895                "missing {expected:?} in {read:?}"
896            );
897        }
898    }
899
900    #[test]
901    fn synchsafe_decode_assembles_7bit_groups() {
902        // (1<<21)|(2<<14)|(3<<7)|4
903        assert_eq!(synchsafe_decode(&[0x01, 0x02, 0x03, 0x04]), 0x0020_8184);
904        // high bit of each byte masked (& 0x7F): 0xFF -> 0x7F per group.
905        assert_eq!(synchsafe_decode(&[0xFF, 0xFF, 0xFF, 0xFF]), 0x0FFF_FFFF);
906        // only the top group set -> pins the `<<21` (kills `<<21 -> >>21`).
907        assert_eq!(synchsafe_decode(&[0x7F, 0x00, 0x00, 0x00]), 0x0FE0_0000);
908        // only the second group set -> pins the `<<14` (kills `<<14 -> >>14`).
909        assert_eq!(synchsafe_decode(&[0x00, 0x7F, 0x00, 0x00]), 0x001F_C000);
910    }
911
912    #[test]
913    fn syncsafe_encodes_and_round_trips() {
914        // pins the `>>21` and `>>14` group extraction.
915        assert_eq!(syncsafe(0x0FE0_0000), [0x7F, 0x00, 0x00, 0x00]);
916        assert_eq!(syncsafe(0x001F_C000), [0x00, 0x7F, 0x00, 0x00]);
917        // round-trip over the full 28-bit range pins every group boundary.
918        for n in [0u32, 1, 127, 128, 0x0123_4567, 0x0FFF_FFFF] {
919            assert_eq!(synchsafe_decode(&syncsafe(n)), n);
920        }
921    }
922
923    #[test]
924    fn locate_audio_no_id3_starts_at_zero() {
925        // >=10 bytes, not "ID3": original skips the ID3 block (audio at 0). The
926        // `&& -> ||` mutant enters the block, decodes garbage, and returns Err — so
927        // this unwrap kills it. Frame sync 0xFF 0xFB at offset 0.
928        let data = [0xFF, 0xFB, 0x90, 0x00, 0, 0, 0, 0, 0, 0];
929        let b = locate_audio(&data).unwrap();
930        assert_eq!(b.audio_offset, 0);
931        assert_eq!(b.audio_length, 10);
932    }
933
934    #[test]
935    fn locate_audio_skips_id3v2_then_finds_sync() {
936        // "ID3" v2.4, flags=0, synchsafe body=4 -> tag_len=14. Sync at offset 14.
937        let mut data = Vec::new();
938        data.extend_from_slice(b"ID3");
939        data.extend_from_slice(&[0x04, 0x00, 0x00]); // major, rev, flags
940        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x04]); // synchsafe body=4
941        data.extend_from_slice(&[0xAA, 0xBB, 0xCC, 0xDD]); // 4 body bytes
942        data.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00]); // audio sync at 14
943        let b = locate_audio(&data).unwrap();
944        assert_eq!(b.audio_offset, 14);
945        assert_eq!(b.audio_length, 4);
946    }
947
948    #[test]
949    fn locate_audio_honors_footer_flag() {
950        // footer flag (0x10) adds 10 to tag_len. body=0 -> tag_len = 10+0+10 = 20.
951        // Sync at offset 20. The `+= -> -=`/`*=` mutant computes the wrong tag_len
952        // and the sync check lands on the wrong byte -> Err (kills the `+=`).
953        let mut data = Vec::new();
954        data.extend_from_slice(b"ID3");
955        data.extend_from_slice(&[0x04, 0x00, 0x10]); // flags: footer present
956        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]); // synchsafe body=0
957        data.extend_from_slice(&[0u8; 10]); // 10-byte footer region
958        data.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00]); // sync at offset 20
959        let b = locate_audio(&data).unwrap();
960        assert_eq!(b.audio_offset, 20);
961    }
962
963    #[test]
964    fn locate_audio_requires_frame_sync() {
965        // data[0]=0xFF but data[1] lacks the 0xE0 sync bits: original rejects
966        // (NotMp3). The `|| -> &&` mutant accepts (only rejects if ALL conditions
967        // hold). The `+ -> *` on data[audio_offset+1] would read data[0] instead of
968        // data[1]; with distinct bytes the sync decision flips.
969        let data = [0xFF, 0x00, 0x00, 0x00, 0, 0, 0, 0, 0, 0];
970        assert_eq!(locate_audio(&data), Err(FormatError::NotMp3));
971        // 1-byte buffer: original NotMp3 (audio_offset+1 >= len). The `+ -> *`
972        // mutant computes 0*1=0 >= 1 = false, falls through, and panics on data[1].
973        assert_eq!(locate_audio(&[0xFF]), Err(FormatError::NotMp3));
974    }
975
976    #[test]
977    fn push_frame_header_size_boundary_is_inclusive() {
978        // ID3v2.4 frame size is a 28-bit syncsafe field; the guard rejects
979        // data_len > 0x0FFF_FFFF. 0x0FFF_FFFF is the inclusive max (Ok); +1 errors.
980        let mut out = Vec::new();
981        assert!(push_frame_header(&mut out, b"TIT2", 0x0FFF_FFFF).is_ok());
982        let mut over = Vec::new();
983        assert_eq!(
984            push_frame_header(&mut over, b"TIT2", 0x1000_0000),
985            Err(FormatError::TooLarge)
986        );
987    }
988
989    #[test]
990    fn is_id3_text_frame_id_classifies_text_frames() {
991        assert!(is_id3_text_frame_id("TPE1")); // T + 3 upper/digit, not TXXX
992        assert!(is_id3_text_frame_id("TIT2"));
993        assert!(!is_id3_text_frame_id("TXXX")); // excluded (kills `!= -> ==`)
994        assert!(!is_id3_text_frame_id("COMM")); // not T-prefixed
995        assert!(!is_id3_text_frame_id("TPE")); // wrong length
996        assert!(!is_id3_text_frame_id("Txx1")); // lowercase -> false
997    }
998
999    #[test]
1000    fn build_id3v2_segments_emits_standard_text_frame_as_itself() {
1001        // A 4-char T-frame key (TPE1) must round-trip as a TPE1 frame, not TXXX.
1002        // The `is_id3_text_frame_id` match-guard `-> false` mutant would route it to
1003        // the TXXX branch, so read_tags would surface it under a different key.
1004        let tags = vec![TagInput::new("TPE1", "Band")];
1005        let (segments, _len) = build_id3v2_segments(&tags, &[], &[]).unwrap();
1006        let mut buf = Vec::new();
1007        for seg in &segments {
1008            if let Segment::Inline(b) = seg {
1009                buf.extend_from_slice(b);
1010            }
1011        }
1012        // The literal frame id "TPE1" must appear in the emitted tag bytes.
1013        assert!(
1014            buf.windows(4).any(|w| w == b"TPE1"),
1015            "TPE1 frame not emitted: routed elsewhere"
1016        );
1017        // And it round-trips to the mapped key (artist), not a TXXX user field.
1018        let read = read_tags(&buf);
1019        assert!(
1020            read.contains(&("artist".to_string(), "Band".to_string())),
1021            "got {read:?}"
1022        );
1023    }
1024
1025    #[test]
1026    fn build_id3v2_segments_rejects_oversized_total_tag() {
1027        // The total-tag guard rejects frames_len > 0x0FFF_FFFF. An APIC art whose
1028        // data_len (a count, not allocated) pushes the total just over the limit
1029        // must error; one byte under must succeed.
1030        let mk = |data_len: u64| ArtInput {
1031            art_id: 1,
1032            mime: "image/png".to_string(),
1033            description: String::new(),
1034            picture_type: PictureType::new(3).unwrap(),
1035            width: 0,
1036            height: 0,
1037            data_len: BlobLen::new(data_len).unwrap(),
1038        };
1039        assert_eq!(
1040            build_id3v2_segments(&[], &[], &[mk(0x1000_0000)]).err(),
1041            Some(FormatError::TooLarge)
1042        );
1043        assert!(build_id3v2_segments(&[], &[], &[mk(16)]).is_ok());
1044        // Exact boundary: compute the APIC framing overhead, then place
1045        // frames_len exactly on 0x0FFF_FFFF (one byte under must succeed) and
1046        // 0x1_0000_0000 (must error). This pins the `> -> >=` mutation. The
1047        // baseline art uses data_len=1 (not 0) because zero-byte art is skipped.
1048        let (_, total_at_one) = build_id3v2_segments(&[], &[], &[mk(1)]).unwrap();
1049        let overhead = total_at_one - 10 - 1; // frames_len = overhead + data_len
1050        let boundary_data_len = 0x0FFF_FFFF - overhead;
1051        assert!(
1052            build_id3v2_segments(&[], &[], &[mk(boundary_data_len)]).is_ok(),
1053            "exact boundary (frames_len == 0x0FFF_FFFF) should be accepted"
1054        );
1055        assert_eq!(
1056            build_id3v2_segments(&[], &[], &[mk(boundary_data_len + 1)]).err(),
1057            Some(FormatError::TooLarge),
1058            "one byte past boundary must be rejected"
1059        );
1060    }
1061
1062    #[test]
1063    fn build_id3v2_segments_emits_art_segment_with_correct_id_and_len() {
1064        // Feed a single art entry and verify the emitted ArtImage segment carries
1065        // the correct art_id and data length.
1066        let mk = |art_id: i64, data_len: u64| ArtInput {
1067            art_id,
1068            mime: "image/png".to_string(),
1069            description: String::new(),
1070            picture_type: PictureType::new(3).unwrap(),
1071            width: 0,
1072            height: 0,
1073            data_len: BlobLen::new(data_len).unwrap(),
1074        };
1075        let (segments, _len) = build_id3v2_segments(&[], &[], &[mk(2, 16)]).unwrap();
1076        let art_segs: Vec<_> = segments
1077            .iter()
1078            .filter_map(|s| match s {
1079                Segment::ArtImage { art_id, len } => Some((*art_id, len.get())),
1080                _ => None,
1081            })
1082            .collect();
1083        assert_eq!(
1084            art_segs,
1085            vec![(2_i64, 16_u64)],
1086            "only the non-empty art should be emitted"
1087        );
1088    }
1089
1090    /// Independent synchsafe encoder for fixtures (does NOT call `syncsafe`, so a
1091    /// mutation there cannot mask a fixture).
1092    fn ss(n: u32) -> [u8; 4] {
1093        [
1094            ((n >> 21) & 0x7F) as u8,
1095            ((n >> 14) & 0x7F) as u8,
1096            ((n >> 7) & 0x7F) as u8,
1097            (n & 0x7F) as u8,
1098        ]
1099    }
1100
1101    /// Build an ID3v2 tag: "ID3", `major`, rev=0, `flags`, synchsafe `body` size,
1102    /// then the raw `frames` bytes.
1103    fn id3v2(major: u8, flags: u8, body: u32, frames: &[u8]) -> Vec<u8> {
1104        let mut v = Vec::new();
1105        v.extend_from_slice(b"ID3");
1106        v.push(major);
1107        v.push(0x00);
1108        v.push(flags);
1109        v.extend_from_slice(&ss(body));
1110        v.extend_from_slice(frames);
1111        v
1112    }
1113
1114    #[test]
1115    fn alloc_safe_accepts_minimal_valid_header() {
1116        // 10-byte v2.4 header, body=0, no frames -> safe. This is exactly the
1117        // len==10 boundary, so the `< -> <=` mutant (10<=10 -> reject) flips it.
1118        let tag = id3v2(0x04, 0x00, 0, &[]);
1119        assert_eq!(tag.len(), 10);
1120        assert!(id3v2_alloc_safe(&tag));
1121    }
1122
1123    #[test]
1124    fn alloc_safe_rejects_short_and_non_id3() {
1125        // "ID3" + 2 bytes (len 5, marker correct): original returns false (len<10).
1126        // `< -> ==` (5==10 false) and `|| -> &&` (true && false) both fall through
1127        // and panic reading data[5]. Asserting `!safe` kills them.
1128        assert!(!id3v2_alloc_safe(b"ID3xx"));
1129        // Right length, wrong marker -> false.
1130        assert!(!id3v2_alloc_safe(b"XXX\x04\x00\x00\x00\x00\x00\x00"));
1131    }
1132
1133    #[test]
1134    fn alloc_safe_rejects_bad_version_and_header_flags() {
1135        // major outside 2..=4 -> false (kills the `matches!(major, 2..=4)` mutations).
1136        assert!(!id3v2_alloc_safe(&id3v2(0x05, 0x00, 0, &[])));
1137        assert!(!id3v2_alloc_safe(&id3v2(0x01, 0x00, 0, &[])));
1138        // extended-header (0x40) or unsync (0x80) -> false (kills `& 0xC0` mutations).
1139        assert!(!id3v2_alloc_safe(&id3v2(0x04, 0x40, 0, &[])));
1140        assert!(!id3v2_alloc_safe(&id3v2(0x04, 0x80, 0, &[])));
1141    }
1142
1143    #[test]
1144    fn alloc_safe_rejects_high_bit_in_body_size() {
1145        // Two body-size bytes with the high bit set: OR = 0x80 (reject). The
1146        // `| -> ^` mutant gives 0x80^0x80 = 0 (accept); `| -> &` gives 0x80&0x80&0&0
1147        // = 0 (accept). Built by hand because `ss()` would clear the high bits.
1148        let tag = vec![b'I', b'D', b'3', 0x04, 0x00, 0x00, 0x80, 0x80, 0x00, 0x00];
1149        assert!(!id3v2_alloc_safe(&tag));
1150        // Single high-bit byte still rejected (pins the `>= 0x80` comparison).
1151        let tag1 = vec![b'I', b'D', b'3', 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80];
1152        assert!(!id3v2_alloc_safe(&tag1));
1153    }
1154
1155    #[test]
1156    fn alloc_safe_rejects_high_bit_in_v24_frame_size() {
1157        // v2.4 frame size is synchsafe; two size bytes with the high bit set must be
1158        // rejected (whole-byte OR check on data[pos+4..pos+8]). The frame is 10 bytes
1159        // (4 id + 4 size + 2 flags), so body=10 makes tag_end == len (20): the walk
1160        // is entered (NOT short-circuited by `tag_end > data.len()`) and the high-bit
1161        // check fires.
1162        let mut frame = b"TIT2".to_vec();
1163        frame.extend_from_slice(&[0x80, 0x80, 0x00, 0x00]); // size bytes, two high bits
1164        frame.extend_from_slice(&[0x00, 0x00]); // frame flags
1165        let tag = id3v2(0x04, 0x00, 10, &frame);
1166        assert!(!id3v2_alloc_safe(&tag));
1167    }
1168
1169    /// A valid ID3v2.3 frame: 4-byte id, 4-byte plain big-endian size, 2 flag bytes,
1170    /// then `payload`.
1171    fn v23_frame(id: &[u8; 4], size: u32, payload: &[u8]) -> Vec<u8> {
1172        let mut v = id.to_vec();
1173        v.extend_from_slice(&size.to_be_bytes());
1174        v.extend_from_slice(&[0x00, 0x00]);
1175        v.extend_from_slice(payload);
1176        v
1177    }
1178
1179    #[test]
1180    fn alloc_safe_v22_24bit_size_decode() {
1181        // v2.2 frame header is 6 bytes: 3-byte id + 3-byte 24-bit big-endian size.
1182        // Declare a size that the *correct* decode puts out of bounds (reject), so a
1183        // decode that drops a size byte would wrongly accept.
1184        // size bytes [0x00,0x01,0x00] = 256, body = 6 (header only, no room) -> reject.
1185        let mut f_mid = b"TT2".to_vec();
1186        f_mid.extend_from_slice(&[0x00, 0x01, 0x00]); // 24-bit size = 256
1187        assert!(!id3v2_alloc_safe(&id3v2(0x02, 0x00, 6, &f_mid))); // pins the mid byte
1188        // size bytes [0x01,0x00,0x00] = 65536 -> reject; pins the high byte.
1189        let mut f_hi = b"TT2".to_vec();
1190        f_hi.extend_from_slice(&[0x01, 0x00, 0x00]);
1191        assert!(!id3v2_alloc_safe(&id3v2(0x02, 0x00, 6, &f_hi)));
1192        // size bytes [0x00,0x00,0x10] = 16, body = 6, tag_end = 16,
1193        // data_start = 16. 16 > 16 - 16 = 0 -> reject. Pins the low byte:
1194        // reading the flags byte (0x00) instead gives size 0 -> wrongly accept.
1195        let mut f_lo = b"TT2".to_vec();
1196        f_lo.extend_from_slice(&[0x00, 0x00, 0x10]); // 24-bit size = 16
1197        assert!(!id3v2_alloc_safe(&id3v2(0x02, 0x00, 6, &f_lo)));
1198        // A valid in-bounds v2.2 frame is accepted: size 4, body = 6+4 = 10.
1199        let mut f_ok = b"TT2".to_vec();
1200        f_ok.extend_from_slice(&[0x00, 0x00, 0x04]);
1201        f_ok.extend_from_slice(&[1, 2, 3, 4]);
1202        assert!(id3v2_alloc_safe(&id3v2(0x02, 0x00, 10, &f_ok)));
1203    }
1204
1205    #[test]
1206    fn alloc_safe_rejects_nonzero_frame_flags() {
1207        // v2.3: non-zero frame flags -> reject (the v2.3 flag check).
1208        let mut f3 = b"TIT2".to_vec();
1209        f3.extend_from_slice(&4u32.to_be_bytes()); // plain size 4
1210        f3.extend_from_slice(&[0x00, 0x01]); // non-zero frame flags
1211        f3.extend_from_slice(&[1, 2, 3, 4]);
1212        assert!(!id3v2_alloc_safe(&id3v2(0x03, 0x00, 14, &f3)));
1213
1214        // v2.4: non-zero frame flags -> reject. This is a SEPARATE code path (the
1215        // v2.4 `else` branch) from the v2.3 check, so it needs its own fixture.
1216        let mut f4 = b"TIT2".to_vec();
1217        f4.extend_from_slice(&ss(4)); // valid synchsafe size 4
1218        f4.extend_from_slice(&[0x00, 0x01]); // non-zero frame flags
1219        f4.extend_from_slice(&[1, 2, 3, 4]);
1220        assert!(!id3v2_alloc_safe(&id3v2(0x04, 0x00, 14, &f4)));
1221    }
1222
1223    #[test]
1224    fn alloc_safe_rejects_chap_and_ctoc() {
1225        // CHAP/CTOC carry sub-frames -> recursive OOM vector -> reject (v2.3/2.4).
1226        let chap = v23_frame(b"CHAP", 4, &[1, 2, 3, 4]);
1227        assert!(!id3v2_alloc_safe(&id3v2(0x03, 0x00, 14, &chap)));
1228        let ctoc = v23_frame(b"CTOC", 4, &[1, 2, 3, 4]);
1229        assert!(!id3v2_alloc_safe(&id3v2(0x03, 0x00, 14, &ctoc)));
1230    }
1231
1232    #[test]
1233    fn alloc_safe_frame_size_bounds() {
1234        // Frame exactly filling the body -> accept (size 4, body = 10+4 = 14).
1235        // data_start = 10+10 = 20, tag_end = 24, rem = 4, size 4 -> 4 > 4 is false.
1236        // Kills A `+ -> *` (data_start=100 -> 100>24 -> reject) and C `> -> >=`
1237        // (4 >= 4 -> reject).
1238        let ok = v23_frame(b"TIT2", 4, &[1, 2, 3, 4]);
1239        assert!(id3v2_alloc_safe(&id3v2(0x03, 0x00, 14, &ok)));
1240        // size one byte past the remainder -> reject (size 5: 5 > 24-20=4). Kills C
1241        // `> -> ==` (5==4 false -> accept), C `- -> +` (rem=44 -> 5>44 false ->
1242        // accept), D `|| -> &&` (false && true -> accept), and A `+ -> -`
1243        // (data_start=0 -> 5 > 24-0=24 false -> accept).
1244        let over = v23_frame(b"TIT2", 5, &[1, 2, 3, 4]);
1245        assert!(!id3v2_alloc_safe(&id3v2(0x03, 0x00, 14, &over)));
1246    }
1247
1248    #[test]
1249    fn alloc_safe_data_start_equal_to_tag_end_is_ok() {
1250        // A size-0 frame: data_start (20) == tag_end (20). Original: `20 > 20` is
1251        // false -> accept. Kills B `> -> ==` (20==20 -> reject) and `> -> >=`.
1252        let zero = v23_frame(b"TIT2", 0, &[]);
1253        assert!(id3v2_alloc_safe(&id3v2(0x03, 0x00, 10, &zero)));
1254    }
1255
1256    #[test]
1257    fn alloc_safe_rejects_bad_second_frame_in_body() {
1258        // Valid frame1 (size 2) then an out-of-bounds frame2 (size 100), both inside
1259        // the declared body (body=26, tag_end=36). Original walks to frame2 and
1260        // rejects. Kills E `+ -> *` (pos = 20*2 = 40 >= 36 -> break -> accept,
1261        // skipping frame2) and E `+ -> -` (pos = 20-2 = 18 -> data[18]==0 padding
1262        // break -> accept).
1263        let mut frames = v23_frame(b"TIT2", 2, &[0xAA, 0xBB]); // 12 bytes, 10..22
1264        frames.extend_from_slice(&v23_frame(b"TPE1", 100, &[1, 2, 3, 4])); // 14, 22..36
1265        assert!(!id3v2_alloc_safe(&id3v2(0x03, 0x00, 26, &frames)));
1266    }
1267
1268    #[test]
1269    fn alloc_safe_stops_at_tag_body_end() {
1270        // A size-0 frame fills the body (tag_end=20), then a bad trailing frame
1271        // beyond tag_end but within the buffer. Original breaks at `pos >= tag_end`
1272        // (20 >= 20) and accepts without walking the trailing garbage. Kills F
1273        // `>= -> <` (20 < 20 false -> no break -> walks the bad frame -> reject).
1274        let mut frames = v23_frame(b"TIT2", 0, &[]); // 10 bytes, 10..20
1275        frames.extend_from_slice(&v23_frame(b"TPE1", 100, &[1, 2, 3, 4])); // 14, 20..34
1276        assert!(id3v2_alloc_safe(&id3v2(0x03, 0x00, 10, &frames)));
1277    }
1278
1279    #[test]
1280    fn alloc_safe_walks_two_frames_and_stops_at_padding() {
1281        // Two valid frames (24 bytes, 10..34) then 10 padding zero bytes (34..44).
1282        // body=25 -> tag_end=35, so after frame2 (pos=34) `34 >= 35` is false (no
1283        // tag-end break); the next iteration enters (`34+10=44 <= 44`) and
1284        // `data[34] == 0` triggers the PADDING break. Kills I `== -> !=` (no break ->
1285        // walks zero bytes -> data_start past tag_end -> reject) and exercises the
1286        // multi-frame walk (E) and the while guard (G).
1287        let mut frames = v23_frame(b"TIT2", 2, &[0xAA, 0xBB]);
1288        frames.extend_from_slice(&v23_frame(b"TPE1", 2, &[0xCC, 0xDD]));
1289        frames.extend_from_slice(&[0u8; 10]); // >= header_len of padding so the walk re-enters
1290        assert!(id3v2_alloc_safe(&id3v2(0x03, 0x00, 25, &frames)));
1291    }
1292
1293    #[test]
1294    fn alloc_safe_rejects_frame_size_exceeding_tag_end() {
1295        // Single frame claiming size 100 in a 14-byte body -> reject before any
1296        // allocation. Reinforces C.
1297        let huge = v23_frame(b"TIT2", 100, &[1, 2, 3, 4]);
1298        assert!(!id3v2_alloc_safe(&id3v2(0x03, 0x00, 14, &huge)));
1299    }
1300
1301    /// ID3v2 header declaring `body` bytes of tag, then a frame-sync byte pair,
1302    /// then `audio`. Returns (full, audio_offset).
1303    fn mp3_with_id3v2(body_len: usize, audio: &[u8]) -> (Vec<u8>, u64) {
1304        let mut v = b"ID3\x04\x00\x00".to_vec(); // version 2.4, no flags
1305        v.extend_from_slice(&syncsafe(u32::try_from(body_len).unwrap()));
1306        v.extend(std::iter::repeat_n(0u8, body_len)); // tag body
1307        let audio_offset = v.len() as u64;
1308        v.extend_from_slice(&[0xFF, 0xFB]); // MPEG frame sync
1309        v.extend_from_slice(audio);
1310        (v, audio_offset)
1311    }
1312
1313    #[test]
1314    fn locate_audio_bounded_complete_with_no_id3v1() {
1315        let (full, audio_offset) = mp3_with_id3v2(8, b"frames");
1316        let prefix = &full[..usize_from(audio_offset) + 2]; // covers tag + sync
1317        let file_len = full.len() as u64;
1318        match locate_audio_bounded(prefix, file_len, None).unwrap() {
1319            Extent::Complete(b) => {
1320                assert_eq!(b.audio_offset, audio_offset);
1321                assert_eq!(b.audio_length, file_len - audio_offset);
1322            }
1323            other @ Extent::NeedMore { .. } => panic!("expected Complete, got {other:?}"),
1324        }
1325    }
1326
1327    #[test]
1328    fn locate_audio_bounded_needmore_when_tag_exceeds_prefix() {
1329        let (full, _audio_offset) = mp3_with_id3v2(4096, b"frames");
1330        let prefix = &full[..32]; // only the 10-byte header is present
1331        let file_len = full.len() as u64;
1332        match locate_audio_bounded(prefix, file_len, None).unwrap() {
1333            Extent::NeedMore { up_to } => assert_eq!(up_to, 10 + 4096 + 2),
1334            other @ Extent::Complete(_) => panic!("expected NeedMore, got {other:?}"),
1335        }
1336    }
1337
1338    #[test]
1339    fn locate_audio_bounded_strips_id3v1_tail() {
1340        let (mut full, audio_offset) = mp3_with_id3v2(8, b"frames");
1341        let body_end = full.len();
1342        full.extend_from_slice(b"TAG"); // ID3v1 marker
1343        full.extend(std::iter::repeat_n(0u8, 125)); // 128-byte tag total
1344        let file_len = full.len() as u64;
1345        let tail: [u8; 128] = full[full.len() - 128..].try_into().unwrap();
1346        let prefix = &full[..usize_from(audio_offset) + 2];
1347        match locate_audio_bounded(prefix, file_len, Some(&tail)).unwrap() {
1348            Extent::Complete(b) => {
1349                assert_eq!(b.audio_offset, audio_offset);
1350                assert_eq!(b.audio_length, body_end as u64 - audio_offset);
1351            }
1352            other @ Extent::NeedMore { .. } => panic!("expected Complete, got {other:?}"),
1353        }
1354    }
1355
1356    #[test]
1357    fn locate_audio_bounded_rejects_audio_start_past_eof() {
1358        // An ID3v2 tag whose declared length leaves no room for the frame sync
1359        // (audio_offset == file_len). The bounded prober must fail fast with
1360        // `NotMp3` rather than loop on `NeedMore { up_to > file_len }`.
1361        let mut full = b"ID3\x04\x00\x00".to_vec();
1362        full.extend_from_slice(&syncsafe(8));
1363        full.extend(std::iter::repeat_n(0u8, 8)); // tag body; file ends here
1364        let file_len = full.len() as u64; // == tag end == audio_offset
1365        match locate_audio_bounded(&full, file_len, None) {
1366            Err(FormatError::NotMp3) => {}
1367            other => panic!("expected Err(NotMp3), got {other:?}"),
1368        }
1369    }
1370
1371    // kills mp3 L75 (`prefix.len() >= 10 && &prefix[0..3] == b"ID3"`: `&&`->`||`).
1372    // A long (>=10) prefix that is NOT "ID3" and starts with a valid frame sync.
1373    // Correct (`&&`): the ID3 branch is skipped -> audio_offset stays 0 -> Complete
1374    // at offset 0. Under `||`: `len>=10 || "ID3"==..` is true, so it parses an ID3
1375    // header out of the non-ID3 bytes, computing a bogus tag_len and a wrong
1376    // audio_offset (or Malformed). Asserting audio_offset==0 kills it.
1377    #[test]
1378    fn locate_audio_bounded_plain_mp3_no_id3_starts_at_zero() {
1379        // 0xFF 0xFB frame sync at offset 0, then payload. len 12 (>= 10).
1380        let data = [0xFF, 0xFB, 0x90, 0x00, 1, 2, 3, 4, 5, 6, 7, 8];
1381        let file_len = data.len() as u64;
1382        match locate_audio_bounded(&data, file_len, None).unwrap() {
1383            Extent::Complete(b) => {
1384                assert_eq!(b.audio_offset, 0);
1385                assert_eq!(b.audio_length, file_len);
1386            }
1387            other @ Extent::NeedMore { .. } => {
1388                panic!("expected Complete at offset 0, got {other:?}")
1389            }
1390        }
1391    }
1392
1393    // Reinforces L75 with a short non-ID3 prefix below the ID3-header length.
1394    // A 5-byte prefix that is not "ID3", file_len < 10. Correct (`&&`): the ID3
1395    // branch is false (len 5 < 10) AND the else-if at L86 is `len<10 && file_len>=10`
1396    // = `true && false` = false, so it proceeds; the frame sync at offset 0 is in
1397    // the prefix -> Complete. Under L75 `||`: `5>=10 || "ID3"==prefix[0..3]` ->
1398    // false || false is still false here, BUT the point is the `&&`->`||` mutant on
1399    // a len>=10 non-ID3 prefix (covered above). This case pins that a short non-ID3
1400    // prefix with a valid sync resolves to Complete (no panic indexing prefix[5..]).
1401    #[test]
1402    fn locate_audio_bounded_short_non_id3_with_small_file() {
1403        // 0xFF 0xFB sync at offset 0; file_len 5 (< 10).
1404        let data = [0xFF, 0xFB, 0x90, 0x00, 0x00];
1405        let file_len = data.len() as u64; // 5
1406        match locate_audio_bounded(&data, file_len, None).unwrap() {
1407            Extent::Complete(b) => {
1408                assert_eq!(b.audio_offset, 0);
1409                assert_eq!(b.audio_length, 5);
1410            }
1411            other @ Extent::NeedMore { .. } => panic!("expected Complete, got {other:?}"),
1412        }
1413    }
1414
1415    // kills mp3 L80 (footer `tag_len += 10`: `+=`->`-=`,`*=`).
1416    // ID3v2.4 tag WITH the footer flag (0x10) and a known body. tag_len must be
1417    // 10 (header) + body + 10 (footer). With body=6, audio_offset must be 26.
1418    // `-=` gives 10+6-10 = 6; `*=` gives (10+6)*10 = 160 (> file_len -> Malformed).
1419    // Frame sync is placed at offset 26 so the correct path returns Complete.
1420    #[test]
1421    fn locate_audio_bounded_footer_flag_adds_ten() {
1422        let body = 6usize;
1423        let mut full = b"ID3\x04\x00".to_vec();
1424        full.push(0x10); // flags: footer present
1425        full.extend_from_slice(&syncsafe(u32::try_from(body).unwrap()));
1426        full.extend(std::iter::repeat_n(0u8, body)); // tag body
1427        full.extend(std::iter::repeat_n(0u8, 10)); // footer region
1428        let expected_offset = full.len() as u64; // 10 + 6 + 10 = 26
1429        full.extend_from_slice(&[0xFF, 0xFB]); // frame sync at offset 26
1430        full.extend_from_slice(b"audio");
1431        let file_len = full.len() as u64;
1432        match locate_audio_bounded(&full, file_len, None).unwrap() {
1433            Extent::Complete(b) => {
1434                assert_eq!(b.audio_offset, 26);
1435                assert_eq!(b.audio_offset, expected_offset);
1436                assert_eq!(b.audio_length, file_len - 26);
1437            }
1438            other @ Extent::NeedMore { .. } => {
1439                panic!("expected Complete at offset 26, got {other:?}")
1440            }
1441        }
1442    }
1443
1444    // kills mp3 L82 (`tag_len as u64 > file_len`: `>`->`==`,`>=`).
1445    // Construct a tag where tag_len == file_len EXACTLY (no room for audio).
1446    // Correct (`>`): `tag_len > file_len` is false -> proceeds; then the L96
1447    // `audio_offset + 2 > file_len` check fires (audio_offset == file_len) ->
1448    // Err(NotMp3). Under `==`/`>=`: `tag_len == file_len` true -> early
1449    // Err(Malformed). Asserting NotMp3 (not Malformed) kills both.
1450    #[test]
1451    fn locate_audio_bounded_tag_len_equals_file_len_is_notmp3_not_malformed() {
1452        let body = 8usize;
1453        let mut full = b"ID3\x04\x00\x00".to_vec();
1454        full.extend_from_slice(&syncsafe(u32::try_from(body).unwrap()));
1455        full.extend(std::iter::repeat_n(0u8, body)); // file ends exactly at tag end
1456        let file_len = full.len() as u64; // == tag_len == audio_offset (18)
1457        match locate_audio_bounded(&full, file_len, None) {
1458            Err(FormatError::NotMp3) => {}
1459            other => panic!("expected Err(NotMp3) for tag_len==file_len, got {other:?}"),
1460        }
1461    }
1462
1463    // kills mp3 L82 true branch (`>`): a tag declaring more than the file holds
1464    // must be Malformed. Pins that the `>` branch is reachable and returns
1465    // Malformed (so the `>`->`==`/`>=` mutants, which change WHICH side is taken,
1466    // are distinguished from the equals case above).
1467    #[test]
1468    fn locate_audio_bounded_tag_len_exceeds_file_len_is_malformed() {
1469        // Declare body=100 but provide a tiny file. tag_len = 110 > file_len.
1470        let mut full = b"ID3\x04\x00\x00".to_vec();
1471        full.extend_from_slice(&syncsafe(100));
1472        full.extend_from_slice(&[0xFF, 0xFB]); // some bytes, but file is short
1473        let file_len = full.len() as u64; // 12, << 110
1474        match locate_audio_bounded(&full, file_len, None) {
1475            Err(FormatError::Malformed) => {}
1476            other => panic!("expected Err(Malformed), got {other:?}"),
1477        }
1478    }
1479
1480    // kills mp3 L86 (`prefix.len() < 10 && file_len >= 10`: the NeedMore{up_to:10}
1481    // else-if). Short non-ID3 prefix (len 5) with file_len >= 10. Correct: `5 < 10
1482    // && 10 >= 10` = true -> NeedMore{up_to:10} (we cannot even read the ID3 header).
1483    // `&&`->`||` keeps it true here; the distinguishing variants are below.
1484    #[test]
1485    fn locate_audio_bounded_short_prefix_large_file_needs_header() {
1486        let prefix = [0x00, 0x00, 0x00, 0x00, 0x00]; // 5 bytes, not "ID3"
1487        let file_len = 64u64; // >= 10
1488        match locate_audio_bounded(&prefix, file_len, None).unwrap() {
1489            Extent::NeedMore { up_to } => assert_eq!(up_to, 10),
1490            other @ Extent::Complete(_) => panic!("expected NeedMore{{up_to:10}}, got {other:?}"),
1491        }
1492    }
1493
1494    // kills mp3 L86 `<`->`<=` (and `<`->`==`): boundary prefix.len()==10.
1495    // A 10-byte non-ID3 prefix with file_len >= 10. Correct (`<`): `10 < 10` is
1496    // false -> does NOT take the NeedMore-header branch -> proceeds. The first two
1497    // prefix bytes are a valid frame sync, so audio at offset 0 resolves Complete.
1498    // Under `<=`: `10 <= 10` true -> wrongly NeedMore{up_to:10}. Under `==`:
1499    // `10 == 10` true -> wrongly NeedMore. Asserting Complete kills both.
1500    #[test]
1501    fn locate_audio_bounded_prefix_len_exactly_ten_proceeds() {
1502        // 10 bytes, not "ID3", frame sync at offset 0.
1503        let prefix = [0xFF, 0xFB, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
1504        let file_len = 64u64; // >= 10, audio extends to file_len
1505        match locate_audio_bounded(&prefix, file_len, None).unwrap() {
1506            Extent::Complete(b) => {
1507                assert_eq!(b.audio_offset, 0);
1508                assert_eq!(b.audio_length, file_len);
1509            }
1510            other @ Extent::NeedMore { .. } => {
1511                panic!("expected Complete (10<10 false), got {other:?}")
1512            }
1513        }
1514    }
1515
1516    // kills mp3 L86 `>=`->`<` on file_len (and helps `&&`->`||`). Short non-ID3
1517    // prefix (len 5) with file_len < 10 (file_len=8). Correct (`>=`): `5 < 10 &&
1518    // 8 >= 10` = `true && false` = false -> does NOT NeedMore -> proceeds; sync at
1519    // offset 0 is in the prefix -> Complete with audio_length 8. Under `>=`->`<`:
1520    // `8 < 10` true -> `true && true` -> wrongly NeedMore{up_to:10}. Under `&&`->
1521    // `||`: `true || false` -> true -> wrongly NeedMore. Asserting Complete kills
1522    // both the `>=`->`<` and the `&&`->`||` mutants.
1523    #[test]
1524    fn locate_audio_bounded_short_prefix_small_file_proceeds() {
1525        let data = [0xFF, 0xFB, 0x90, 0x00, 0x00]; // len 5, file_len 8 -> but prefix==file here
1526        // Make file_len 8 with the same 5-byte prefix window; the sync pair (2 bytes)
1527        // is inside the prefix, so it resolves without needing more.
1528        let file_len = 8u64;
1529        match locate_audio_bounded(&data, file_len, None).unwrap() {
1530            Extent::Complete(b) => {
1531                assert_eq!(b.audio_offset, 0);
1532                assert_eq!(b.audio_length, 8);
1533            }
1534            other @ Extent::NeedMore { .. } => {
1535                panic!("expected Complete (file_len<10), got {other:?}")
1536            }
1537        }
1538    }
1539
1540    // kills mp3 L96 (`audio_offset as u64 + 2 > file_len`: `+`->`-`).
1541    // Build a real ID3v2 tag so audio_offset > 0, with the audio start placed
1542    // JUST past EOF: audio_offset + 2 == file_len + 1 (i.e. audio_offset ==
1543    // file_len - 1). Correct (`+`): `audio_offset + 2 > file_len` -> true ->
1544    // Err(NotMp3). Under `-`: `audio_offset - 2 > file_len` -> false (since
1545    // audio_offset < file_len) -> proceeds -> would read past EOF / wrong answer.
1546    #[test]
1547    fn locate_audio_bounded_sync_one_byte_past_eof_is_notmp3() {
1548        let body = 4usize;
1549        let mut full = b"ID3\x04\x00\x00".to_vec();
1550        full.extend_from_slice(&syncsafe(u32::try_from(body).unwrap()));
1551        full.extend(std::iter::repeat_n(0u8, body)); // tag end at offset 14
1552        let audio_offset = full.len() as u64; // 14
1553        full.push(0xFF); // a single sync byte present (so prefix has audio_offset+1)
1554        // file_len = audio_offset + 1, so audio_offset + 2 == file_len + 1 (just past).
1555        let file_len = audio_offset + 1; // 15
1556        match locate_audio_bounded(&full, file_len, None) {
1557            Err(FormatError::NotMp3) => {}
1558            other => panic!("expected Err(NotMp3) (sync past EOF), got {other:?}"),
1559        }
1560    }
1561
1562    // Complement to L96: audio_offset + 2 <= file_len must proceed (not reject).
1563    // Pins that the `>` comparison's false branch is reachable; with `+`->`-` the
1564    // earlier case flips, so this guards the true semantics of "+2 fits".
1565    #[test]
1566    fn locate_audio_bounded_sync_fits_in_file_proceeds() {
1567        let (full, audio_offset) = mp3_with_id3v2(4, b"frames");
1568        let file_len = full.len() as u64; // audio_offset + 2 + 6
1569        match locate_audio_bounded(&full, file_len, None).unwrap() {
1570            Extent::Complete(b) => assert_eq!(b.audio_offset, audio_offset),
1571            other @ Extent::NeedMore { .. } => panic!("expected Complete, got {other:?}"),
1572        }
1573    }
1574
1575    #[test]
1576    fn locate_audio_bounded_sync_exactly_at_eof_proceeds() {
1577        // Boundary: audio_offset + 2 == file_len exactly (audio is just the 2-byte
1578        // frame sync). `audio_offset + 2 > file_len` is false -> Complete. The
1579        // `>`->`>=` mutant makes `16 >= 16` true -> wrongly Err(NotMp3). Mirrors the
1580        // unbounded reject `audio_offset + 1 >= len` (accepts when +2 <= len).
1581        let body = 4usize;
1582        let mut full = b"ID3\x04\x00\x00".to_vec();
1583        full.extend_from_slice(&syncsafe(u32::try_from(body).unwrap()));
1584        full.extend(std::iter::repeat_n(0u8, body)); // tag end at offset 14
1585        let audio_offset = full.len() as u64; // 14
1586        full.push(0xFF); // frame sync pair, and nothing after
1587        full.push(0xFB);
1588        let file_len = full.len() as u64; // 16 == audio_offset + 2
1589        // kills mp3 L96 `>`->`>=`: equal-fit audio must be accepted, not rejected.
1590        match locate_audio_bounded(&full, file_len, None).unwrap() {
1591            Extent::Complete(b) => {
1592                assert_eq!(b.audio_offset, audio_offset);
1593                assert_eq!(b.audio_length, 2);
1594            }
1595            other @ Extent::NeedMore { .. } => {
1596                panic!("expected Complete (exact fit), got {other:?}")
1597            }
1598        }
1599    }
1600
1601    // kills mp3 L107 (`prefix[audio_offset] != 0xFF || (prefix[audio_offset+1] &
1602    // 0xE0) != 0xE0`): `||`->`&&` and `+`->`*`.
1603    // Frame-sync byte 0 is 0xFF but byte 1 lacks the 0xE0 sync bits. Correct
1604    // (`||`): first operand false, second true -> reject NotMp3. Under `&&`:
1605    // `false && true` -> accept (wrong) -> would return Complete. The `+`->`*` on
1606    // `audio_offset + 1`: with audio_offset==0, `0*1 == 0` reads byte 0 (0xFF)
1607    // instead of byte 1, so `(0xFF & 0xE0) != 0xE0` is false -> with `||` short of
1608    // first-operand-false the decision changes; pairing distinct bytes makes the
1609    // sync verdict observable.
1610    #[test]
1611    fn locate_audio_bounded_rejects_bad_second_sync_byte() {
1612        // byte0 = 0xFF (passes first half), byte1 = 0x00 (fails the 0xE0 check).
1613        let data = [
1614            0xFF, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1615        ];
1616        let file_len = data.len() as u64;
1617        match locate_audio_bounded(&data, file_len, None) {
1618            Err(FormatError::NotMp3) => {}
1619            other => panic!("expected Err(NotMp3) (bad sync byte 1), got {other:?}"),
1620        }
1621    }
1622
1623    // Reinforces L107 `+`->`*` at a NON-zero audio_offset so `audio_offset + 1`
1624    // and `audio_offset * 1` differ. With an ID3 tag pushing audio_offset to 14:
1625    // byte[14] = 0xFF (good), byte[15] = 0x00 (bad second byte). Correct reads
1626    // byte[15] -> reject NotMp3. Under `+`->`*`: `14 * 1 == 14` reads byte[14]
1627    // (0xFF) again -> `(0xFF & 0xE0)==0xE0` so the second test passes -> accept
1628    // (wrong). Asserting NotMp3 kills `+`->`*`.
1629    #[test]
1630    fn locate_audio_bounded_rejects_bad_second_sync_byte_after_id3() {
1631        let body = 4usize;
1632        let mut full = b"ID3\x04\x00\x00".to_vec();
1633        full.extend_from_slice(&syncsafe(u32::try_from(body).unwrap()));
1634        full.extend(std::iter::repeat_n(0u8, body)); // audio_offset = 14
1635        full.extend_from_slice(&[0xFF, 0x00]); // byte14=0xFF good, byte15=0x00 bad
1636        full.extend_from_slice(b"tail");
1637        let file_len = full.len() as u64;
1638        match locate_audio_bounded(&full, file_len, None) {
1639            Err(FormatError::NotMp3) => {}
1640            other => panic!("expected Err(NotMp3) (bad sync at 15), got {other:?}"),
1641        }
1642    }
1643
1644    // kills mp3 L101 frame-sync NeedMore (`audio_offset + 2 > prefix.len()`).
1645    // A tag whose audio_offset is inside file_len, but the prefix is shorter than
1646    // audio_offset + 2 (the sync pair is past the prefix window). Correct: returns
1647    // NeedMore{up_to: audio_offset + 2}. A `+`->`*` (audio_offset*2) or a flipped
1648    // comparison changes up_to. Here audio_offset=14, so up_to must be 16; prefix
1649    // is only 15 bytes (one short of the sync pair).
1650    #[test]
1651    fn locate_audio_bounded_needmore_for_sync_past_prefix() {
1652        let body = 4usize;
1653        let mut full = b"ID3\x04\x00\x00".to_vec();
1654        full.extend_from_slice(&syncsafe(u32::try_from(body).unwrap()));
1655        full.extend(std::iter::repeat_n(0u8, body)); // audio_offset = 14
1656        full.extend_from_slice(&[0xFF, 0xFB]); // sync at 14..16
1657        full.extend_from_slice(b"more audio bytes here");
1658        let file_len = full.len() as u64; // plenty of room
1659        let prefix = &full[..15]; // 14-byte tag + only 1 of the 2 sync bytes
1660        match locate_audio_bounded(prefix, file_len, None).unwrap() {
1661            Extent::NeedMore { up_to } => assert_eq!(up_to, 16), // audio_offset(14) + 2
1662            other @ Extent::Complete(_) => panic!("expected NeedMore{{up_to:16}}, got {other:?}"),
1663        }
1664    }
1665
1666    // kills mp3 L113 (`file_len >= audio_offset + 128 && &tail[0..3] == b"TAG"`:
1667    // `&&`->`||`) — the TRIM case. A valid MP3 with a "TAG"-prefixed tail and
1668    // file_len >= audio_offset + 128. Correct: trim -> audio_length = file_len -
1669    // audio_offset - 128. (The complement no-trim case is below; together they pin
1670    // the `&&`.)
1671    #[test]
1672    fn locate_audio_bounded_trims_id3v1_when_tag_and_room() {
1673        let (mut full, audio_offset) = mp3_with_id3v2(8, b"frames");
1674        let body_end = full.len();
1675        full.extend_from_slice(b"TAG");
1676        full.extend(std::iter::repeat_n(0u8, 125)); // 128-byte ID3v1 trailer
1677        let file_len = full.len() as u64;
1678        assert!(file_len >= audio_offset + 128); // both conditions true
1679        let tail: [u8; 128] = full[full.len() - 128..].try_into().unwrap();
1680        let prefix = &full[..usize_from(audio_offset) + 2];
1681        match locate_audio_bounded(prefix, file_len, Some(&tail)).unwrap() {
1682            Extent::Complete(b) => {
1683                assert_eq!(b.audio_offset, audio_offset);
1684                // kills mp3 L113: trimmed length excludes the 128-byte ID3v1 tail.
1685                assert_eq!(b.audio_length, file_len - audio_offset - 128);
1686                assert_eq!(b.audio_length, body_end as u64 - audio_offset);
1687            }
1688            other @ Extent::NeedMore { .. } => panic!("expected Complete (trimmed), got {other:?}"),
1689        }
1690    }
1691
1692    // kills mp3 L113 (`&&`->`||`) — the NO-TRIM case. file_len >= audio_offset+128
1693    // is TRUE, but the tail does NOT start with "TAG". Correct (`&&`): second
1694    // operand false -> no trim -> audio_length == file_len - audio_offset. Under
1695    // `||`: first operand true -> trims 128 wrongly -> shorter length. Asserting
1696    // the un-trimmed length kills the `||` mutant.
1697    #[test]
1698    fn locate_audio_bounded_no_trim_when_tail_not_tag() {
1699        let (mut full, audio_offset) = mp3_with_id3v2(8, b"frames");
1700        // Pad with enough non-"TAG" trailing bytes so file_len >= audio_offset+128.
1701        full.extend(std::iter::repeat_n(0u8, 200));
1702        let file_len = full.len() as u64;
1703        assert!(file_len >= audio_offset + 128); // first operand TRUE
1704        let tail: [u8; 128] = full[full.len() - 128..].try_into().unwrap();
1705        assert_ne!(&tail[0..3], b"TAG"); // second operand FALSE
1706        let prefix = &full[..usize_from(audio_offset) + 2];
1707        match locate_audio_bounded(prefix, file_len, Some(&tail)).unwrap() {
1708            Extent::Complete(b) => {
1709                assert_eq!(b.audio_offset, audio_offset);
1710                // No trim: full audio length from offset to EOF.
1711                assert_eq!(b.audio_length, file_len - audio_offset);
1712            }
1713            other @ Extent::NeedMore { .. } => panic!("expected Complete (no trim), got {other:?}"),
1714        }
1715    }
1716
1717    // Complement to L113 first-operand: tail starts with "TAG" but file_len <
1718    // audio_offset + 128 (no room for a real ID3v1). Correct (`&&`): first operand
1719    // false -> no trim. Under `||`: second operand true -> trims 128 even though
1720    // file_len < audio_offset + 128, which would underflow / shorten wrongly.
1721    // Asserting the un-trimmed length pins the first operand of the `&&`.
1722    #[test]
1723    fn locate_audio_bounded_no_trim_when_no_room_even_with_tag_tail() {
1724        let (mut full, audio_offset) = mp3_with_id3v2(8, b"frames");
1725        // Short file: append a "TAG"-prefixed tail but keep file_len < offset+128.
1726        full.extend_from_slice(b"TAGxx"); // tail-ish marker, but file stays short
1727        let file_len = full.len() as u64;
1728        assert!(file_len < audio_offset + 128); // first operand FALSE
1729        // Build a 128-byte tail buffer that starts with "TAG" (the function only
1730        // looks at tail[0..3]); file_len is the real gate here.
1731        let mut tail = [0u8; 128];
1732        tail[0..3].copy_from_slice(b"TAG");
1733        let prefix = &full[..usize_from(audio_offset) + 2];
1734        match locate_audio_bounded(prefix, file_len, Some(&tail)).unwrap() {
1735            Extent::Complete(b) => {
1736                assert_eq!(b.audio_offset, audio_offset);
1737                assert_eq!(b.audio_length, file_len - audio_offset); // no trim
1738            }
1739            other @ Extent::NeedMore { .. } => {
1740                panic!("expected Complete (no room, no trim), got {other:?}")
1741            }
1742        }
1743    }
1744
1745    /// Build a minimal ID3v2.4 tag containing the given frames, with header
1746    /// flags=0 (no unsync, no extended header) and per-frame flags=0 so
1747    /// `id3v2_alloc_safe` accepts it. Used by `read_binary_tags` tests that
1748    /// need a tag without going through the `id3` crate's encoder (which would
1749    /// re-encode `Unknown` bodies and defeat the byte-exact property).
1750    fn build_v24_tag(frames: &[(&[u8; 4], &[u8])]) -> Vec<u8> {
1751        let total_body: usize = frames.iter().map(|(_, b)| 10 + b.len()).sum();
1752        let mut out = Vec::new();
1753        out.extend_from_slice(b"ID3");
1754        out.extend_from_slice(&[0x04, 0x00, 0x00]); // v2.4.0, no flags
1755        out.extend_from_slice(&ss(u32::try_from(total_body).unwrap()));
1756        for (id, body) in frames {
1757            out.extend_from_slice(*id);
1758            out.extend_from_slice(&ss(u32::try_from(body.len()).unwrap()));
1759            out.extend_from_slice(&[0x00, 0x00]); // frame flags
1760            out.extend_from_slice(body);
1761        }
1762        out
1763    }
1764
1765    /// Like `build_v24_tag` but emits an ID3v2.3 tag: frame sizes are plain
1766    /// 32-bit big-endian (not synchsafe). Exercises the `data[3] == 3` size-decode
1767    /// branch of `read_binary_tags`, which no v2.4 fixture can reach.
1768    fn build_v23_tag(frames: &[(&[u8; 4], &[u8])]) -> Vec<u8> {
1769        let total_body: usize = frames.iter().map(|(_, b)| 10 + b.len()).sum();
1770        let mut out = Vec::new();
1771        out.extend_from_slice(b"ID3");
1772        out.extend_from_slice(&[0x03, 0x00, 0x00]); // v2.3.0, no flags
1773        out.extend_from_slice(&ss(u32::try_from(total_body).unwrap())); // tag size is synchsafe in every version
1774        for (id, body) in frames {
1775            out.extend_from_slice(*id);
1776            out.extend_from_slice(&(u32::try_from(body.len()).unwrap()).to_be_bytes()); // v2.3: plain u32 frame size
1777            out.extend_from_slice(&[0x00, 0x00]); // frame flags
1778            out.extend_from_slice(body);
1779        }
1780        out
1781    }
1782
1783    // Documented EQUIVALENT mutant (no test can kill it):
1784    //  * classify_binary_frame counter fold `(a << 8) | b` -> `(a << 8) ^ b`.
1785    //    The accumulator is left-shifted by 8 before each combine, so its low
1786    //    byte is always zero where `b` lands; OR and XOR are bit-for-bit identical
1787    //    for every input. Confirmed by hand; left as-is.
1788
1789    #[test]
1790    fn read_binary_tags_v23_plain_u32_frame_size() {
1791        // Two v2.3 frames: a non-zero filler followed by a >=128-byte PRIV. The
1792        // filler's trailing bytes sit just before the PRIV header, so every byte of
1793        // the plain-u32 size field (data[pos+4..pos+8], the `data[3] == 3` branch)
1794        // reads a distinct non-zero value — a wrong size-byte offset (e.g. `pos + 4`
1795        // -> `pos - 4`) then decodes a bogus size and drops/corrupts the PRIV, and a
1796        // synchsafe misdecode (the `== 3` branch flipped) truncates it. Both frames
1797        // must survive byte-exact.
1798        let filler = vec![0xAAu8; 8];
1799        let body: Vec<u8> = (0..200u32)
1800            .map(|i| u8::try_from(i % 250 + 1).unwrap())
1801            .collect();
1802        let tag = build_v23_tag(&[(b"GEOB", &filler), (b"PRIV", &body)]);
1803        let (opaque, _promoted) = super::read_binary_tags(&tag);
1804        let geob = opaque
1805            .iter()
1806            .find(|e| e.key == "GEOB")
1807            .expect("v2.3 GEOB preserved");
1808        assert_eq!(
1809            geob.payload, filler,
1810            "v2.3 first frame must survive byte-exact"
1811        );
1812        let priv_frame = opaque
1813            .iter()
1814            .find(|e| e.key == "PRIV")
1815            .expect("v2.3 PRIV preserved");
1816        assert_eq!(
1817            priv_frame.payload, body,
1818            "v2.3 plain-u32 frame body must survive byte-exact"
1819        );
1820    }
1821
1822    #[test]
1823    fn read_binary_tags_skips_unsafe_tag() {
1824        // A well-formed v2.4 PRIV tag with the unsynchronisation flag forced on:
1825        // id3v2_alloc_safe rejects it, so read_binary_tags must yield nothing. The
1826        // major version stays >= 3, so the `!alloc_safe || major<3` guard hinges on
1827        // the `||` (an `&&` mutant would parse the rejected tag).
1828        let mut tag = build_v24_tag(&[(b"PRIV", &[1, 2, 3])]);
1829        tag[5] = 0x80; // unsynchronisation flag
1830        let (opaque, promoted) = super::read_binary_tags(&tag);
1831        assert!(
1832            opaque.is_empty() && promoted.is_empty(),
1833            "an alloc-unsafe tag must yield no binary frames"
1834        );
1835    }
1836
1837    #[test]
1838    fn read_binary_tags_skips_text_comm_uslt_apic() {
1839        // T***/COMM/USLT/APIC are handled by read_tags/read_pictures and must NOT
1840        // be captured as opaque binary frames; only PRIV is.
1841        let tag = build_v24_tag(&[
1842            (b"TIT2", &[0x00, b'x']),
1843            (b"COMM", &[0x00]),
1844            (b"USLT", &[0x00]),
1845            (b"APIC", &[0x00]),
1846            (b"PRIV", &[9, 9, 9]),
1847        ]);
1848        let (opaque, _promoted) = super::read_binary_tags(&tag);
1849        let keys: Vec<&str> = opaque.iter().map(|e| e.key.as_str()).collect();
1850        assert_eq!(
1851            keys,
1852            vec!["PRIV"],
1853            "only PRIV is opaque; T***/COMM/USLT/APIC are handled elsewhere: {keys:?}"
1854        );
1855    }
1856
1857    #[test]
1858    fn read_binary_tags_decodes_popm_counter_big_endian_and_zero() {
1859        // Multi-byte counter must decode big-endian (0x0102 == 258), pinning the
1860        // `<< 8` shift in the fold.
1861        let tag = build_v24_tag(&[(b"POPM", &[0x00, 200, 0x01, 0x02])]);
1862        let (_opaque, promoted) = super::read_binary_tags(&tag);
1863        assert!(
1864            promoted.contains(&("rating".to_string(), "200".to_string())),
1865            "rating: {promoted:?}"
1866        );
1867        assert!(
1868            promoted.contains(&("playcount".to_string(), "258".to_string())),
1869            "counter must decode big-endian: {promoted:?}"
1870        );
1871
1872        // A zero counter must NOT promote a playcount (pins `c > 0`).
1873        let tag0 = build_v24_tag(&[(b"POPM", &[0x00, 128, 0x00])]);
1874        let (_o0, promoted0) = super::read_binary_tags(&tag0);
1875        assert!(
1876            promoted0.contains(&("rating".to_string(), "128".to_string())),
1877            "rating: {promoted0:?}"
1878        );
1879        assert!(
1880            !promoted0.iter().any(|(k, _)| k == "playcount"),
1881            "a zero POPM counter must not promote playcount: {promoted0:?}"
1882        );
1883    }
1884
1885    #[test]
1886    fn popm_frame_data_emits_counter_only_when_positive() {
1887        // playcount == 0: owner-nul + rating, no counter.
1888        assert_eq!(
1889            super::popm_frame_data(200, 0),
1890            vec![0x00, 200],
1891            "playcount 0 must omit the counter"
1892        );
1893        // playcount > 0: 4-byte big-endian counter appended.
1894        assert_eq!(
1895            super::popm_frame_data(200, 5),
1896            vec![0x00, 200, 0x00, 0x00, 0x00, 0x05],
1897            "playcount > 0 must append a 4-byte counter"
1898        );
1899    }
1900
1901    #[test]
1902    fn build_id3v2_segments_accounts_playcount_and_opaque_len() {
1903        use crate::{BinaryTagInput, TagInput};
1904
1905        // playcount text tag must rebuild into the POPM counter (pins the
1906        // `"playcount"` match arm).
1907        let tags = vec![
1908            TagInput::new("rating", "100"),
1909            TagInput::new("playcount", "42"),
1910        ];
1911        let (segments, _len) = build_id3v2_segments(&tags, &[], &[]).unwrap();
1912        let inline: Vec<u8> = segments
1913            .iter()
1914            .flat_map(|s| match s {
1915                Segment::Inline(b) => b.clone(),
1916                _ => Vec::new(),
1917            })
1918            .collect();
1919        let (_opaque, promoted) = super::read_binary_tags(&inline);
1920        assert!(
1921            promoted.contains(&("playcount".to_string(), "42".to_string())),
1922            "playcount must rebuild into the POPM counter: {promoted:?}"
1923        );
1924
1925        // Opaque-frame length accounting: total == ID3 header (10) + frame header
1926        // (10) + body. Pins `frames_len += 10 + bt.len`.
1927        let bin = vec![BinaryTagInput {
1928            key: "PRIV".into(),
1929            payload_id: 1,
1930            len: BlobLen::new(7).unwrap(),
1931        }];
1932        let (_segs, total) = build_id3v2_segments(&[], &bin, &[]).unwrap();
1933        assert_eq!(total, 10 + 10 + 7, "opaque binary frame length accounting");
1934    }
1935
1936    #[test]
1937    fn read_binary_tags_promotes_popm_and_mbid_and_passes_through_priv() {
1938        use id3::frame::{Content, Popularimeter, UniqueFileIdentifier, Unknown};
1939        use id3::{Encoder, Frame, Tag, TagLike, Version};
1940
1941        let mut tag = Tag::new();
1942        tag.add_frame(Popularimeter {
1943            user: "a@b.c".into(),
1944            rating: 200,
1945            counter: 7,
1946        });
1947        tag.add_frame(UniqueFileIdentifier {
1948            owner_identifier: "http://musicbrainz.org".into(),
1949            identifier: b"mbid-123".to_vec(),
1950        });
1951        tag.add_frame(UniqueFileIdentifier {
1952            owner_identifier: "http://other.example".into(),
1953            identifier: b"other".to_vec(),
1954        });
1955        tag.add_frame(Frame::with_content(
1956            "PRIV",
1957            Content::Unknown(Unknown {
1958                data: vec![9, 8, 7],
1959                version: Version::Id3v24,
1960            }),
1961        ));
1962        let mut buf = Vec::new();
1963        Encoder::new()
1964            .version(Version::Id3v24)
1965            .encode(&tag, &mut buf)
1966            .unwrap();
1967
1968        let (opaque, promoted) = super::read_binary_tags(&buf);
1969        assert!(promoted.contains(&("rating".to_string(), "200".to_string())));
1970        assert!(promoted.contains(&("playcount".to_string(), "7".to_string())));
1971        assert!(promoted.contains(&("musicbrainz_trackid".to_string(), "mbid-123".to_string())));
1972        let keys: Vec<&str> = opaque.iter().map(|e| e.key.as_str()).collect();
1973        assert!(keys.contains(&"PRIV"));
1974        // Non-MusicBrainz UFID is opaque (raw body, owner + identifier); exactly one UFID.
1975        assert_eq!(keys.iter().filter(|k| **k == "UFID").count(), 1);
1976        assert_eq!(
1977            opaque.iter().find(|e| e.key == "PRIV").unwrap().payload,
1978            vec![9, 8, 7]
1979        );
1980    }
1981
1982    #[test]
1983    fn read_binary_tags_preserves_geob_body_byte_exact() {
1984        // A GEOB body with a Latin1 (encoding 0x00) description — the exact case
1985        // the crate's to_unknown() would re-encode to UTF-8. Build a minimal v2.4
1986        // tag by hand so the bytes on the wire are guaranteed to match the
1987        // asserted body.
1988        let geob_body: Vec<u8> = {
1989            let mut b = vec![0x00]; // text encoding: ISO-8859-1
1990            b.extend_from_slice(b"application/octet-stream\0"); // mime
1991            b.extend_from_slice(b"Serato Overview\0"); // filename (latin1)
1992            b.extend_from_slice(b"\0"); // description (empty, terminator only)
1993            b.extend_from_slice(&[0xDE, 0xAD, 0xBE, 0xEF]); // object data
1994            b
1995        };
1996        let tag = build_v24_tag(&[(b"GEOB", &geob_body)]);
1997
1998        let (opaque, _promoted) = super::read_binary_tags(&tag);
1999        let geob = opaque
2000            .iter()
2001            .find(|e| e.key == "GEOB")
2002            .expect("GEOB preserved");
2003        assert_eq!(
2004            geob.payload, geob_body,
2005            "GEOB body must survive byte-identical"
2006        );
2007    }
2008
2009    #[test]
2010    fn build_id3v2_segments_rebuilds_popm_ufid_and_streams_opaque() {
2011        use crate::BinaryTagInput;
2012        let tags = vec![
2013            TagInput::new("artist", "A"),
2014            TagInput::new("rating", "200"),
2015            TagInput::new("playcount", "7"),
2016            TagInput::new("musicbrainz_trackid", "mbid-123"),
2017        ];
2018        let bin = vec![BinaryTagInput {
2019            key: "PRIV".into(),
2020            payload_id: 42,
2021            len: BlobLen::new(3).unwrap(),
2022        }];
2023        let (segments, _len) = super::build_id3v2_segments(&tags, &bin, &[]).unwrap();
2024
2025        assert!(
2026            segments.iter().any(|s| matches!(
2027                s,
2028                Segment::BinaryTag {
2029                    payload_id: 42,
2030                    len,
2031                    ..
2032                } if len.get() == 3
2033            )),
2034            "opaque PRIV must stream as Segment::BinaryTag"
2035        );
2036
2037        let inline: Vec<u8> = segments
2038            .iter()
2039            .flat_map(|s| match s {
2040                Segment::Inline(b) => b.clone(),
2041                _ => Vec::new(),
2042            })
2043            .collect();
2044        assert!(find_sub(&inline, b"POPM"), "POPM not rebuilt");
2045        assert!(find_sub(&inline, b"UFID"), "UFID not rebuilt");
2046        assert!(
2047            find_sub(&inline, b"http://musicbrainz.org"),
2048            "UFID owner missing"
2049        );
2050        assert!(!find_sub(&inline, b"rating"), "promoted key leaked as TXXX");
2051        assert!(
2052            !find_sub(&inline, b"musicbrainz_trackid"),
2053            "promoted key leaked as TXXX"
2054        );
2055    }
2056
2057    #[test]
2058    fn build_id3v2_segments_first_promoted_scalar_wins() {
2059        // Duplicate `rating`/`musicbrainz_trackid` rows (e.g. an over-tagged DB):
2060        // the first value is rebuilt into the POPM/UFID frame, later ones dropped.
2061        // Pins the `popm_rating.is_none()` / `mbid.is_none()` guards.
2062        let tags = vec![
2063            TagInput::new("rating", "10"),
2064            TagInput::new("rating", "20"),
2065            TagInput::new("musicbrainz_trackid", "mbid-first"),
2066            TagInput::new("musicbrainz_trackid", "mbid-second"),
2067        ];
2068        let (segments, _len) = build_id3v2_segments(&tags, &[], &[]).unwrap();
2069        let inline: Vec<u8> = segments
2070            .iter()
2071            .flat_map(|s| match s {
2072                Segment::Inline(b) => b.clone(),
2073                _ => Vec::new(),
2074            })
2075            .collect();
2076
2077        // First MusicBrainz id wins, later one dropped.
2078        assert!(find_sub(&inline, b"mbid-first"), "first mbid must win");
2079        assert!(
2080            !find_sub(&inline, b"mbid-second"),
2081            "later mbid must be dropped"
2082        );
2083
2084        // First rating wins: re-parse the synthesized tag and read the promoted value.
2085        let (_opaque, promoted) = super::read_binary_tags(&inline);
2086        assert!(
2087            promoted.contains(&("rating".to_string(), "10".to_string())),
2088            "first rating must win: {promoted:?}"
2089        );
2090        assert!(
2091            !promoted.iter().any(|(k, v)| k == "rating" && v == "20"),
2092            "later rating must be dropped: {promoted:?}"
2093        );
2094    }
2095
2096    #[test]
2097    fn build_id3v2_segments_checked_art_len_rejects_overflow() {
2098        // A hostile art data_len near u64::MAX must fail closed with TooLarge at
2099        // the checked add, not panic (debug) / wrap (release).
2100        let mk = |data_len: u64| ArtInput {
2101            art_id: 1,
2102            mime: "image/png".to_string(),
2103            description: String::new(),
2104            picture_type: PictureType::new(3).unwrap(),
2105            width: 0,
2106            height: 0,
2107            data_len: BlobLen::new(data_len).unwrap(),
2108        };
2109        assert_eq!(
2110            build_id3v2_segments(&[], &[], &[mk(u64::MAX)]).err(),
2111            Some(FormatError::TooLarge)
2112        );
2113    }
2114
2115    fn find_sub(hay: &[u8], needle: &[u8]) -> bool {
2116        hay.windows(needle.len()).any(|w| w == needle)
2117    }
2118
2119    /// On a whole buffer with the production tail (`Some(last 128 bytes)` when
2120    /// the file is at least 128 bytes), `locate_audio_bounded` must agree with
2121    /// `locate_audio`: same accept/reject, same `Mp3Bounds`. This pins the
2122    /// equivalence the #212 fuzz oracle relies on.
2123    fn assert_mp3_bounded_matches_full(data: &[u8]) {
2124        let len = data.len() as u64;
2125        let tail: Option<&[u8; 128]> = if data.len() >= 128 {
2126            data[data.len() - 128..].try_into().ok()
2127        } else {
2128            None
2129        };
2130        match (locate_audio(data), locate_audio_bounded(data, len, tail)) {
2131            (Ok(full), Ok(Extent::Complete(bounded))) => assert_eq!(full, bounded),
2132            (Err(_), Err(_)) => {}
2133            (full, bounded) => {
2134                panic!("mp3 bounded/full divergence: full={full:?} bounded={bounded:?}")
2135            }
2136        }
2137    }
2138
2139    #[test]
2140    fn locate_audio_rejects_high_bit_size_byte() {
2141        // Malformed synchsafe size (last byte 0x80) that masks to body=0, with a valid
2142        // frame sync at offset 10. Must reject rather than serve audio from offset 10.
2143        let mut data = Vec::new();
2144        data.extend_from_slice(b"ID3");
2145        data.extend_from_slice(&[0x04, 0x00, 0x00]); // major, rev, flags
2146        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x80]); // high bit set -> malformed
2147        data.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00]); // valid sync at offset 10
2148        assert_eq!(locate_audio(&data), Err(FormatError::Malformed));
2149    }
2150
2151    #[test]
2152    fn locate_audio_rejects_unsupported_major_version() {
2153        let mut data = Vec::new();
2154        data.extend_from_slice(b"ID3");
2155        data.extend_from_slice(&[0x05, 0x00, 0x00]); // major 5 (unsupported)
2156        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
2157        data.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00]);
2158        assert_eq!(locate_audio(&data), Err(FormatError::Malformed));
2159    }
2160
2161    #[test]
2162    fn locate_audio_bounded_rejects_high_bit_size_byte() {
2163        let mut data = Vec::new();
2164        data.extend_from_slice(b"ID3");
2165        data.extend_from_slice(&[0x04, 0x00, 0x00]);
2166        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x80]);
2167        data.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00]);
2168        let file_len = data.len() as u64;
2169        assert_eq!(
2170            locate_audio_bounded(&data, file_len, None),
2171            Err(FormatError::Malformed)
2172        );
2173    }
2174
2175    #[test]
2176    fn mp3_bounded_matches_full_on_whole_buffer() {
2177        // Plain ID3v2.4 + frame sync (no trailer, < 128 bytes -> tail None).
2178        assert_mp3_bounded_matches_full(&crate::fuzz_check::fixtures::mp3());
2179        // Carries a GEOB frame; longer file.
2180        assert_mp3_bounded_matches_full(&crate::fuzz_check::fixtures::mp3_with_binary_frame());
2181
2182        // A >=128-byte MP3 with a trailing ID3v1 "TAG" block, so the tail-strip
2183        // path is exercised and the tail argument is Some.
2184        let mut with_trailer = crate::fuzz_check::fixtures::mp3();
2185        with_trailer.resize(200, 0x00);
2186        with_trailer.extend_from_slice(b"TAG");
2187        with_trailer.resize(with_trailer.len() + 125, 0x00); // pad ID3v1 to 128 bytes
2188        assert_mp3_bounded_matches_full(&with_trailer);
2189    }
2190}