Skip to main content

mutagen_rs/
lib.rs

1pub mod common;
2pub mod id3;
3pub mod mp3;
4pub mod flac;
5pub mod ogg;
6pub mod mp4;
7pub mod vorbis;
8
9#[cfg(feature = "python")]
10use std::sync::{Arc, RwLock, OnceLock};
11#[cfg(feature = "python")]
12use std::collections::HashMap;
13
14#[cfg(feature = "python")]
15static FILE_CACHE: OnceLock<RwLock<HashMap<String, Arc<[u8]>>>> = OnceLock::new();
16
17#[cfg(feature = "python")]
18fn get_file_cache() -> &'static RwLock<HashMap<String, Arc<[u8]>>> {
19    FILE_CACHE.get_or_init(|| RwLock::new(HashMap::with_capacity(256)))
20}
21
22
23#[cfg(feature = "python")]
24#[inline]
25fn read_cached(path: &str) -> std::io::Result<Arc<[u8]>> {
26    let cache = get_file_cache();
27    {
28        let guard = cache.read().unwrap();
29        if let Some(data) = guard.get(path) {
30            return Ok(Arc::clone(data));
31        }
32    }
33    let data: Arc<[u8]> = fast_file_read(path)?.into();
34    {
35        let mut guard = cache.write().unwrap();
36        if let Some(existing) = guard.get(path) {
37            return Ok(Arc::clone(existing));
38        }
39        guard.insert(path.to_string(), Arc::clone(&data));
40    }
41    Ok(data)
42}
43
44/// Fast file read using raw libc syscalls.
45/// Avoids Rust's Path→OsString→CString conversion and uses O_NOATIME on Linux.
46#[cfg(feature = "python")]
47#[inline]
48fn fast_file_read(path: &str) -> std::io::Result<Vec<u8>> {
49    #[cfg(unix)]
50    {
51        fast_file_read_unix(path)
52    }
53    #[cfg(not(unix))]
54    {
55        std::fs::read(path)
56    }
57}
58
59#[cfg(all(feature = "python", unix))]
60fn fast_file_read_unix(path: &str) -> std::io::Result<Vec<u8>> {
61    use std::io;
62
63    // Null-terminate path for libc (avoid heap alloc for typical paths < 256 bytes)
64    let path_bytes = path.as_bytes();
65    let mut c_buf = [0u8; 256];
66    let c_path: *const libc::c_char = if path_bytes.len() < 256 {
67        c_buf[..path_bytes.len()].copy_from_slice(path_bytes);
68        c_buf[path_bytes.len()] = 0;
69        c_buf.as_ptr() as *const libc::c_char
70    } else {
71        // Long path: heap allocate
72        let mut v = Vec::with_capacity(path_bytes.len() + 1);
73        v.extend_from_slice(path_bytes);
74        v.push(0);
75        // Leak temporarily (will be reclaimed when Vec drops)
76        // Actually, just use std::fs::read for very long paths
77        return std::fs::read(path);
78    };
79
80    // Open with O_NOATIME (Linux only) to skip atime update
81    #[cfg(target_os = "linux")]
82    let mut fd = unsafe { libc::open(c_path, libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOATIME) };
83    #[cfg(not(target_os = "linux"))]
84    let mut fd = unsafe { libc::open(c_path, libc::O_RDONLY | libc::O_CLOEXEC) };
85
86    if fd < 0 {
87        // O_NOATIME may fail with EPERM if we don't own the file — retry without it
88        #[cfg(target_os = "linux")]
89        {
90            fd = unsafe { libc::open(c_path, libc::O_RDONLY | libc::O_CLOEXEC) };
91        }
92        if fd < 0 {
93            return Err(io::Error::last_os_error());
94        }
95    }
96
97    // Fast path: try to read without fstat (saves ~1μs syscall for small files).
98    // Use thread-local buffer to avoid per-call allocation.
99    const FAST_BUF_SIZE: usize = 256 * 1024; // 256KB covers most audio metadata files
100    thread_local! {
101        static FAST_BUF: std::cell::UnsafeCell<Vec<u8>> = std::cell::UnsafeCell::new(vec![0u8; FAST_BUF_SIZE]);
102    }
103
104    let result = FAST_BUF.with(|cell| {
105        let buf = unsafe { &mut *cell.get() };
106        let n = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, FAST_BUF_SIZE) };
107        if n < 0 {
108            unsafe { libc::close(fd); }
109            return Err(io::Error::last_os_error());
110        }
111        let n = n as usize;
112        if n < FAST_BUF_SIZE {
113            // Entire file fit in buffer — copy out and close (no fstat needed)
114            unsafe { libc::close(fd); }
115            Ok(buf[..n].to_vec())
116        } else {
117            // File is larger: fstat to get remaining size, then read rest
118            let total_size = unsafe {
119                let mut stat: libc::stat = std::mem::zeroed();
120                if libc::fstat(fd, &mut stat) != 0 {
121                    libc::close(fd);
122                    return Err(io::Error::last_os_error());
123                }
124                stat.st_size as usize
125            };
126            let mut out = Vec::with_capacity(total_size);
127            out.extend_from_slice(&buf[..n]);
128            // Read remainder
129            let remaining = total_size - n;
130            out.reserve(remaining);
131            unsafe {
132                let n2 = libc::read(fd, out.as_mut_ptr().add(n) as *mut libc::c_void, remaining);
133                libc::close(fd);
134                if n2 > 0 {
135                    out.set_len(n + n2 as usize);
136                }
137            }
138            Ok(out)
139        }
140    });
141
142    result
143}
144
145#[cfg(feature = "python")]
146mod python_bindings {
147use super::*;
148use pyo3::prelude::*;
149use pyo3::types::{PyDict, PyList, PyBytes, PyTuple};
150use pyo3::exceptions::{PyValueError, PyKeyError, PyIOError};
151
152// ---- Python Classes ----
153
154/// POPM (Popularimeter) frame — matches mutagen's POPM repr.
155#[pyclass(name = "POPM", skip_from_py_object)]
156#[derive(Debug, Clone)]
157struct PyPOPM {
158    #[pyo3(get)]
159    email: String,
160    #[pyo3(get)]
161    rating: u8,
162    #[pyo3(get)]
163    count: u64,
164}
165
166#[pymethods]
167impl PyPOPM {
168    #[new]
169    fn new(email: String, rating: u8, count: u64) -> Self {
170        PyPOPM { email, rating, count }
171    }
172    fn __repr__(&self) -> String {
173        format!("POPM(email='{}', rating={}, count={})", self.email, self.rating, self.count)
174    }
175    fn __str__(&self) -> String {
176        self.__repr__()
177    }
178}
179
180#[pyclass(name = "MPEGInfo", from_py_object)]
181#[derive(Debug, Clone)]
182struct PyMPEGInfo {
183    #[pyo3(get)]
184    length: f64,
185    #[pyo3(get)]
186    channels: u32,
187    #[pyo3(get)]
188    bitrate: u32,
189    #[pyo3(get)]
190    sample_rate: u32,
191    #[pyo3(get)]
192    version: f64,
193    #[pyo3(get)]
194    layer: u8,
195    #[pyo3(get)]
196    mode: u32,
197    #[pyo3(get)]
198    protected: bool,
199    #[pyo3(get)]
200    bitrate_mode: u8,
201    #[pyo3(get)]
202    encoder_info: String,
203    #[pyo3(get)]
204    encoder_settings: String,
205    #[pyo3(get)]
206    track_gain: Option<f32>,
207    #[pyo3(get)]
208    track_peak: Option<f32>,
209    #[pyo3(get)]
210    album_gain: Option<f32>,
211}
212
213#[pymethods]
214impl PyMPEGInfo {
215    fn __repr__(&self) -> String {
216        format!(
217            "MPEGInfo(length={:.2}, bitrate={}, sample_rate={}, channels={}, version={}, layer={})",
218            self.length, self.bitrate, self.sample_rate, self.channels, self.version, self.layer
219        )
220    }
221
222    fn pprint(&self) -> String {
223        format!(
224            "MPEG {} layer {} {:.2} seconds, {} bps, {} Hz",
225            self.version, self.layer, self.length, self.bitrate, self.sample_rate
226        )
227    }
228}
229
230/// ID3 tag container.
231#[pyclass(name = "ID3")]
232#[derive(Debug)]
233struct PyID3 {
234    tags: id3::tags::ID3Tags,
235    path: Option<String>,
236    version: (u8, u8),
237}
238
239#[pymethods]
240impl PyID3 {
241    #[new]
242    #[pyo3(signature = (filename=None))]
243    fn new(filename: Option<&str>) -> PyResult<Self> {
244        match filename {
245            Some(path) => {
246                let (tags, header) = id3::load_id3(path)?;
247                let version = header.as_ref().map(|h| h.version).unwrap_or((4, 0));
248                Ok(PyID3 {
249                    tags,
250                    path: Some(path.to_string()),
251                    version,
252                })
253            }
254            None => Ok(PyID3 {
255                tags: id3::tags::ID3Tags::new(),
256                path: None,
257                version: (4, 0),
258            }),
259        }
260    }
261
262    fn getall(&self, key: &str) -> PyResult<Vec<Py<PyAny>>> {
263        Python::attach(|py| {
264            let frames = self.tags.getall(key);
265            Ok(frames.iter().map(|f| frame_to_py(py, f)).collect())
266        })
267    }
268
269    fn keys(&self) -> Vec<String> {
270        self.tags.keys()
271    }
272
273    fn values(&self, py: Python) -> Vec<Py<PyAny>> {
274        self.tags.values().iter().map(|f| frame_to_py(py, f)).collect()
275    }
276
277    fn __getitem__(&mut self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
278        match self.tags.get_mut(key) {
279            Some(frame) => Ok(frame_to_py(py, frame)),
280            None => Err(PyKeyError::new_err(key.to_string())),
281        }
282    }
283
284    fn __setitem__(&mut self, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
285        let text = value.extract::<Vec<String>>().or_else(|_| {
286            value.extract::<String>().map(|s| vec![s])
287        })?;
288
289        let frame = id3::frames::Frame::Text(id3::frames::TextFrame {
290            id: key.to_string(),
291            encoding: id3::specs::Encoding::Utf8,
292            text,
293        });
294
295        let hash_key = frame.hash_key();
296        // Replace existing or push new (Vec-based tag storage)
297        if let Some((_, frames)) = self.tags.frames.iter_mut().find(|(k, _)| k == &hash_key) {
298            *frames = vec![id3::tags::LazyFrame::Decoded(frame)];
299        } else {
300            self.tags.frames.push((hash_key, vec![id3::tags::LazyFrame::Decoded(frame)]));
301        }
302        Ok(())
303    }
304
305    fn __delitem__(&mut self, key: &str) -> PyResult<()> {
306        self.tags.delall(key);
307        Ok(())
308    }
309
310    fn __contains__(&self, key: &str) -> bool {
311        self.tags.get(key).is_some()
312    }
313
314    fn __len__(&self) -> usize {
315        self.tags.len()
316    }
317
318    fn __repr__(&self) -> String {
319        format!("ID3(keys={})", self.tags.keys().join(", "))
320    }
321
322    fn __iter__(&self, py: Python) -> PyResult<Py<PyAny>> {
323        let keys = self.tags.keys();
324        let list = PyList::new(py, &keys)?;
325        Ok(list.call_method0("__iter__")?.into())
326    }
327
328    fn save(&self, filename: Option<&str>) -> PyResult<()> {
329        let path = filename
330            .map(|s| s.to_string())
331            .or_else(|| self.path.clone())
332            .ok_or_else(|| PyValueError::new_err("No filename specified"))?;
333
334        id3::save_id3(&path, &self.tags, self.version.0.max(3))?;
335        invalidate_file(&path);
336        Ok(())
337    }
338
339    fn delete(&self, filename: Option<&str>) -> PyResult<()> {
340        let path = filename
341            .map(|s| s.to_string())
342            .or_else(|| self.path.clone())
343            .ok_or_else(|| PyValueError::new_err("No filename specified"))?;
344
345        id3::delete_id3(&path)?;
346        invalidate_file(&path);
347        Ok(())
348    }
349
350    fn pprint(&self) -> String {
351        let mut parts = Vec::new();
352        for frame in self.tags.values() {
353            parts.push(format!("{}={}", frame.frame_id(), frame.pprint()));
354        }
355        parts.join("\n")
356    }
357
358    #[getter]
359    fn version(&self) -> (u8, u8) {
360        self.version
361    }
362}
363
364/// MP3 file (ID3 tags + audio info).
365#[pyclass(name = "MP3")]
366struct PyMP3 {
367    #[pyo3(get)]
368    info: PyMPEGInfo,
369    #[pyo3(get)]
370    filename: String,
371    tag_dict: Py<PyDict>,
372    tag_keys: Vec<String>,
373    id3: PyID3,
374}
375
376impl PyMP3 {
377    #[inline(always)]
378    fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
379        let mut mp3_file = mp3::MP3File::parse(data, filename)?;
380        mp3_file.ensure_tags_parsed(data);
381        let info = make_mpeg_info(&mp3_file.info);
382        let version = mp3_file.id3_header.as_ref().map(|h| h.version).unwrap_or((4, 0));
383
384        // Pre-build Python dict of all tags during construction
385        let tag_dict = PyDict::new(py);
386        let mut tag_keys = Vec::with_capacity(mp3_file.tags.frames.len());
387        for (hash_key, frames) in mp3_file.tags.frames.iter_mut() {
388            if let Some(lf) = frames.first_mut() {
389                if let Ok(frame) = lf.decode_with_buf(&mp3_file.tags.raw_buf) {
390                    let key_str = hash_key.as_str();
391                    let _ = tag_dict.set_item(key_str, frame_to_py(py, frame));
392                    tag_keys.push(key_str.to_string());
393                }
394            }
395        }
396
397        Ok(PyMP3 {
398            info,
399            filename: filename.to_string(),
400            tag_dict: tag_dict.into(),
401            tag_keys,
402            id3: PyID3 {
403                tags: mp3_file.tags,
404                path: Some(filename.to_string()),
405                version,
406            },
407        })
408    }
409}
410
411#[pymethods]
412impl PyMP3 {
413    #[new]
414    fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
415        let data = read_cached(filename)
416            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
417        Self::from_data(py, &data, filename)
418    }
419
420    #[getter]
421    fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
422        let id3 = PyID3 {
423            tags: self.id3.tags.clone(),
424            path: self.id3.path.clone(),
425            version: self.id3.version,
426        };
427        Ok(id3.into_pyobject(py)?.into_any().unbind())
428    }
429
430    fn keys(&self) -> Vec<String> {
431        self.tag_keys.clone()
432    }
433
434    #[inline(always)]
435    fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
436        let dict = self.tag_dict.bind(py);
437        match dict.get_item(key)? {
438            Some(val) => Ok(val.unbind()),
439            None => Err(PyKeyError::new_err(key.to_string())),
440        }
441    }
442
443    fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
444        let text = value.extract::<Vec<String>>().or_else(|_| {
445            value.extract::<String>().map(|s| vec![s])
446        })?;
447        // Update the cached Python dict + key list
448        let _ = self.tag_dict.bind(py).set_item(key, PyList::new(py, &text)?);
449        if !self.tag_keys.contains(&key.to_string()) {
450            self.tag_keys.push(key.to_string());
451        }
452        // Update the underlying ID3 tag storage
453        let frame = id3::frames::Frame::Text(id3::frames::TextFrame {
454            id: key.to_string(),
455            encoding: id3::specs::Encoding::Utf8,
456            text,
457        });
458        let hash_key = frame.hash_key();
459        if let Some((_, frames)) = self.id3.tags.frames.iter_mut().find(|(k, _)| k == &hash_key) {
460            *frames = vec![id3::tags::LazyFrame::Decoded(frame)];
461        } else {
462            self.id3.tags.frames.push((hash_key, vec![id3::tags::LazyFrame::Decoded(frame)]));
463        }
464        Ok(())
465    }
466
467    fn __contains__(&self, py: Python, key: &str) -> bool {
468        self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
469    }
470
471    fn __repr__(&self) -> String {
472        format!("MP3(filename={:?})", self.filename)
473    }
474
475    fn save(&self) -> PyResult<()> {
476        self.id3.save(Some(&self.filename))
477    }
478
479    fn delete(&self) -> PyResult<()> {
480        self.id3.delete(Some(&self.filename))
481    }
482
483    fn add_tags(&self) -> PyResult<()> {
484        // MP3 always has ID3 tags after construction
485        Ok(())
486    }
487
488    fn clear(&mut self, py: Python) -> PyResult<()> {
489        self.id3.tags.frames.clear();
490        self.tag_keys.clear();
491        let dict = self.tag_dict.bind(py);
492        dict.clear();
493        Ok(())
494    }
495
496    fn pprint(&self) -> String {
497        format!("{}\n{}", self.info.pprint(), self.id3.pprint())
498    }
499}
500
501/// FLAC stream info.
502#[pyclass(name = "StreamInfo", from_py_object)]
503#[derive(Debug, Clone)]
504struct PyStreamInfo {
505    #[pyo3(get)]
506    length: f64,
507    #[pyo3(get)]
508    channels: u8,
509    #[pyo3(get)]
510    sample_rate: u32,
511    #[pyo3(get)]
512    bits_per_sample: u8,
513    #[pyo3(get)]
514    total_samples: u64,
515    #[pyo3(get)]
516    min_block_size: u16,
517    #[pyo3(get)]
518    max_block_size: u16,
519    #[pyo3(get)]
520    min_frame_size: u32,
521    #[pyo3(get)]
522    max_frame_size: u32,
523    #[pyo3(get)]
524    bitrate: u32,
525}
526
527#[pymethods]
528impl PyStreamInfo {
529    fn __repr__(&self) -> String {
530        format!(
531            "StreamInfo(length={:.2}, sample_rate={}, channels={}, bits_per_sample={})",
532            self.length, self.sample_rate, self.channels, self.bits_per_sample
533        )
534    }
535
536    fn pprint(&self) -> String {
537        format!(
538            "FLAC, {:.2} seconds, {} Hz",
539            self.length, self.sample_rate
540        )
541    }
542}
543
544/// VorbisComment-based tags (used by FLAC and OGG).
545#[pyclass(name = "VComment", from_py_object)]
546#[derive(Debug, Clone)]
547struct PyVComment {
548    vc: vorbis::VorbisComment,
549    #[allow(dead_code)]
550    path: Option<String>,
551}
552
553#[pymethods]
554impl PyVComment {
555    fn keys(&self) -> Vec<String> {
556        self.vc.keys()
557    }
558
559    #[inline(always)]
560    fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
561        let values = self.vc.get(key);
562        if values.is_empty() {
563            return Err(PyKeyError::new_err(key.to_string()));
564        }
565        Ok(PyList::new(py, values)?.into_any().unbind())
566    }
567
568    fn __setitem__(&mut self, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
569        let values = value.extract::<Vec<String>>().or_else(|_| {
570            value.extract::<String>().map(|s| vec![s])
571        })?;
572        self.vc.set(key, values);
573        Ok(())
574    }
575
576    fn __delitem__(&mut self, key: &str) -> PyResult<()> {
577        self.vc.delete(key);
578        Ok(())
579    }
580
581    fn __contains__(&self, key: &str) -> bool {
582        !self.vc.get(key).is_empty()
583    }
584
585    fn __len__(&self) -> usize {
586        self.vc.keys().len()
587    }
588
589    fn __iter__(&self, py: Python) -> PyResult<Py<PyAny>> {
590        let keys = self.vc.keys();
591        let list = PyList::new(py, &keys)?;
592        Ok(list.call_method0("__iter__")?.into())
593    }
594
595    fn __repr__(&self) -> String {
596        format!("VComment(keys={})", self.vc.keys().join(", "))
597    }
598
599    #[getter]
600    fn vendor(&self) -> &str {
601        &self.vc.vendor
602    }
603}
604
605/// FLAC file.
606#[pyclass(name = "FLAC")]
607struct PyFLAC {
608    #[pyo3(get)]
609    info: PyStreamInfo,
610    #[pyo3(get)]
611    filename: String,
612    flac_file: flac::FLACFile,
613    vc_data: vorbis::VorbisComment,
614    tag_dict: Py<PyDict>,
615    tag_keys: Vec<String>,
616}
617
618impl PyFLAC {
619    #[inline(always)]
620    fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
621        let mut flac_file = flac::FLACFile::parse(data, filename)?;
622
623        // Compute bitrate from audio data size (exclude metadata), matching mutagen
624        let audio_data_size = data.len().saturating_sub(flac_file.flac_offset + flac_file.metadata_length);
625        let bitrate = if flac_file.info.length > 0.0 {
626            (audio_data_size as f64 * 8.0 / flac_file.info.length) as u32
627        } else { 0 };
628        let info = PyStreamInfo {
629            length: flac_file.info.length,
630            channels: flac_file.info.channels,
631            sample_rate: flac_file.info.sample_rate,
632            bits_per_sample: flac_file.info.bits_per_sample,
633            total_samples: flac_file.info.total_samples,
634            min_block_size: flac_file.info.min_block_size,
635            max_block_size: flac_file.info.max_block_size,
636            min_frame_size: flac_file.info.min_frame_size,
637            max_frame_size: flac_file.info.max_frame_size,
638            bitrate,
639        };
640
641        flac_file.ensure_tags();
642        let vc_data = flac_file.tags.clone().unwrap_or_else(|| vorbis::VorbisComment::new());
643
644        // Pre-build Python dict of all tags
645        let tag_dict = PyDict::new(py);
646        let tag_keys = vc_data.keys();
647        for key in &tag_keys {
648            let values = vc_data.get(key);
649            if !values.is_empty() {
650                let _ = tag_dict.set_item(key.as_str(), PyList::new(py, values)?);
651            }
652        }
653
654        Ok(PyFLAC {
655            info,
656            filename: filename.to_string(),
657            flac_file,
658            vc_data,
659            tag_dict: tag_dict.into(),
660            tag_keys,
661        })
662    }
663}
664
665#[pymethods]
666impl PyFLAC {
667    #[new]
668    fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
669        let data = read_cached(filename)
670            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
671        Self::from_data(py, &data, filename)
672    }
673
674    #[getter]
675    fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
676        let vc = self.vc_data.clone();
677        let pvc = PyVComment { vc, path: Some(self.filename.clone()) };
678        Ok(pvc.into_pyobject(py)?.into_any().unbind())
679    }
680
681    fn keys(&self) -> Vec<String> {
682        self.tag_keys.clone()
683    }
684
685    #[inline(always)]
686    fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
687        let dict = self.tag_dict.bind(py);
688        match dict.get_item(key)? {
689            Some(val) => Ok(val.unbind()),
690            None => Err(PyKeyError::new_err(key.to_string())),
691        }
692    }
693
694    fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
695        let values = value.extract::<Vec<String>>().or_else(|_| {
696            value.extract::<String>().map(|s| vec![s])
697        })?;
698        // Update the cached Python dict + key list
699        let _ = self.tag_dict.bind(py).set_item(key, PyList::new(py, &values)?);
700        if !self.tag_keys.contains(&key.to_string()) {
701            self.tag_keys.push(key.to_string());
702        }
703        // Update the underlying Vorbis comment storage
704        self.vc_data.set(key, values.clone());
705        if let Some(ref mut tags) = self.flac_file.tags {
706            tags.set(key, values);
707        }
708        Ok(())
709    }
710
711    fn __contains__(&self, py: Python, key: &str) -> bool {
712        self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
713    }
714
715    fn __repr__(&self) -> String {
716        format!("FLAC(filename={:?})", self.filename)
717    }
718
719    fn save(&self) -> PyResult<()> {
720        self.flac_file.save()?;
721        invalidate_file(&self.filename);
722        Ok(())
723    }
724
725    #[getter]
726    fn pictures(&self, py: Python) -> PyResult<Py<PyList>> {
727        let mut pics = Vec::new();
728        // Resolve lazy pictures from the file data
729        for lp in &self.flac_file.lazy_pictures {
730            let data = std::fs::read(&self.filename)
731                .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
732            if lp.block_offset + lp.block_size <= data.len() {
733                if let Ok(pic) = flac::FLACPicture::parse(&data[lp.block_offset..lp.block_offset + lp.block_size]) {
734                    let dict = PyDict::new(py);
735                    let _ = dict.set_item("type", pic.pic_type);
736                    let _ = dict.set_item("mime", &pic.mime);
737                    let _ = dict.set_item("desc", &pic.desc);
738                    let _ = dict.set_item("width", pic.width);
739                    let _ = dict.set_item("height", pic.height);
740                    let _ = dict.set_item("depth", pic.depth);
741                    let _ = dict.set_item("colors", pic.colors);
742                    let _ = dict.set_item("data", pyo3::types::PyBytes::new(py, &pic.data));
743                    pics.push(dict.into_any().unbind());
744                }
745            }
746        }
747        // Also include already-parsed pictures
748        for pic in &self.flac_file.pictures {
749            let dict = PyDict::new(py);
750            let _ = dict.set_item("type", pic.pic_type);
751            let _ = dict.set_item("mime", &pic.mime);
752            let _ = dict.set_item("desc", &pic.desc);
753            let _ = dict.set_item("width", pic.width);
754            let _ = dict.set_item("height", pic.height);
755            let _ = dict.set_item("depth", pic.depth);
756            let _ = dict.set_item("colors", pic.colors);
757            let _ = dict.set_item("data", pyo3::types::PyBytes::new(py, &pic.data));
758            pics.push(dict.into_any().unbind());
759        }
760        Ok(PyList::new(py, pics)?.unbind())
761    }
762
763    fn delete(&self) -> PyResult<()> {
764        // Delete all FLAC tags by clearing VC and pictures, then saving
765        let mut flac_file = flac::FLACFile::open(&self.filename)
766            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
767        flac_file.tags = Some(vorbis::VorbisComment::new());
768        flac_file.pictures.clear();
769        flac_file.lazy_pictures.clear();
770        flac_file.save()
771            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
772        invalidate_file(&self.filename);
773        Ok(())
774    }
775
776    fn add_tags(&mut self) -> PyResult<()> {
777        // Ensure tags exist (FLAC always has a VC block)
778        self.flac_file.ensure_tags();
779        Ok(())
780    }
781
782    fn clear(&mut self, py: Python) -> PyResult<()> {
783        self.vc_data = vorbis::VorbisComment::new();
784        self.tag_keys.clear();
785        let dict = self.tag_dict.bind(py);
786        dict.clear();
787        if let Some(ref mut tags) = self.flac_file.tags {
788            *tags = vorbis::VorbisComment::new();
789        }
790        Ok(())
791    }
792}
793
794/// OGG Vorbis info.
795#[pyclass(name = "OggVorbisInfo", from_py_object)]
796#[derive(Debug, Clone)]
797struct PyOggVorbisInfo {
798    #[pyo3(get)]
799    length: f64,
800    #[pyo3(get)]
801    channels: u8,
802    #[pyo3(get)]
803    sample_rate: u32,
804    #[pyo3(get)]
805    bitrate: u32,
806}
807
808#[pymethods]
809impl PyOggVorbisInfo {
810    fn __repr__(&self) -> String {
811        format!(
812            "OggVorbisInfo(length={:.2}, sample_rate={}, channels={})",
813            self.length, self.sample_rate, self.channels
814        )
815    }
816
817    fn pprint(&self) -> String {
818        format!(
819            "Ogg Vorbis, {:.2} seconds, {} Hz",
820            self.length, self.sample_rate
821        )
822    }
823}
824
825/// OGG Vorbis file.
826#[pyclass(name = "OggVorbis")]
827struct PyOggVorbis {
828    #[pyo3(get)]
829    info: PyOggVorbisInfo,
830    #[pyo3(get)]
831    filename: String,
832    vc: PyVComment,
833    tag_dict: Py<PyDict>,
834    tag_keys: Vec<String>,
835}
836
837impl PyOggVorbis {
838    #[inline(always)]
839    fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
840        let mut ogg_file = ogg::OggVorbisFile::parse(data, filename)?;
841        ogg_file.ensure_full_parse(data);
842        ogg_file.ensure_tags();
843
844        let info = PyOggVorbisInfo {
845            length: ogg_file.info.length,
846            channels: ogg_file.info.channels,
847            sample_rate: ogg_file.info.sample_rate,
848            bitrate: ogg_file.info.bitrate,
849        };
850
851        // Pre-build Python dict of all tags
852        let tag_dict = PyDict::new(py);
853        let tag_keys = ogg_file.tags.keys();
854        for key in &tag_keys {
855            let values = ogg_file.tags.get(key);
856            if !values.is_empty() {
857                let _ = tag_dict.set_item(key.as_str(), PyList::new(py, values)?);
858            }
859        }
860
861        let vc = PyVComment {
862            vc: ogg_file.tags,
863            path: Some(filename.to_string()),
864        };
865
866        Ok(PyOggVorbis {
867            info,
868            filename: filename.to_string(),
869            vc,
870            tag_dict: tag_dict.into(),
871            tag_keys,
872        })
873    }
874}
875
876#[pymethods]
877impl PyOggVorbis {
878    #[new]
879    fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
880        let data = read_cached(filename)
881            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
882        Self::from_data(py, &data, filename)
883    }
884
885    #[getter]
886    fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
887        let vc = self.vc.clone();
888        Ok(vc.into_pyobject(py)?.into_any().unbind())
889    }
890
891    fn keys(&self) -> Vec<String> {
892        self.tag_keys.clone()
893    }
894
895    #[inline(always)]
896    fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
897        let dict = self.tag_dict.bind(py);
898        match dict.get_item(key)? {
899            Some(val) => Ok(val.unbind()),
900            None => Err(PyKeyError::new_err(key.to_string())),
901        }
902    }
903
904    fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
905        let values = value.extract::<Vec<String>>().or_else(|_| {
906            value.extract::<String>().map(|s| vec![s])
907        })?;
908        self.vc.vc.set(key, values.clone());
909        let _ = self.tag_dict.bind(py).set_item(key, PyList::new(py, &values)?);
910        if !self.tag_keys.contains(&key.to_string()) {
911            self.tag_keys.push(key.to_string());
912        }
913        Ok(())
914    }
915
916    fn __contains__(&self, py: Python, key: &str) -> bool {
917        self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
918    }
919
920    fn __repr__(&self) -> String {
921        format!("OggVorbis(filename={:?})", self.filename)
922    }
923
924    fn save(&self) -> PyResult<()> {
925        let data = read_cached(&self.filename)
926            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
927        let mut ogg_file = ogg::OggVorbisFile::parse(&data, &self.filename)
928            .map_err(|e| PyValueError::new_err(format!("{}", e)))?;
929        ogg_file.tags = self.vc.vc.clone();
930        ogg_file.save()
931            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
932        invalidate_file(&self.filename);
933        Ok(())
934    }
935
936    fn delete(&self) -> PyResult<()> {
937        let data = read_cached(&self.filename)
938            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
939        let mut ogg_file = ogg::OggVorbisFile::parse(&data, &self.filename)
940            .map_err(|e| PyValueError::new_err(format!("{}", e)))?;
941        ogg_file.tags = vorbis::VorbisComment::new();
942        ogg_file.save()
943            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
944        invalidate_file(&self.filename);
945        Ok(())
946    }
947
948    fn add_tags(&self) -> PyResult<()> {
949        Ok(())
950    }
951
952    fn clear(&mut self, py: Python) -> PyResult<()> {
953        self.vc.vc = vorbis::VorbisComment::new();
954        self.tag_keys.clear();
955        let dict = self.tag_dict.bind(py);
956        dict.clear();
957        Ok(())
958    }
959}
960
961/// MP4 file info.
962#[pyclass(name = "MP4Info", from_py_object)]
963#[derive(Debug, Clone)]
964struct PyMP4Info {
965    #[pyo3(get)]
966    length: f64,
967    #[pyo3(get)]
968    channels: u32,
969    #[pyo3(get)]
970    sample_rate: u32,
971    #[pyo3(get)]
972    bitrate: u32,
973    #[pyo3(get)]
974    bits_per_sample: u32,
975    #[pyo3(get)]
976    codec: String,
977    #[pyo3(get)]
978    codec_description: String,
979}
980
981#[pymethods]
982impl PyMP4Info {
983    fn __repr__(&self) -> String {
984        format!(
985            "MP4Info(length={:.2}, codec={}, channels={}, sample_rate={})",
986            self.length, self.codec, self.channels, self.sample_rate
987        )
988    }
989
990    fn pprint(&self) -> String {
991        format!(
992            "MPEG-4 audio ({}), {:.2} seconds, {} bps",
993            self.codec, self.length, self.bitrate
994        )
995    }
996}
997
998/// MP4 tags.
999#[pyclass(name = "MP4Tags", from_py_object)]
1000#[derive(Debug, Clone)]
1001struct PyMP4Tags {
1002    tags: mp4::MP4Tags,
1003}
1004
1005#[pymethods]
1006impl PyMP4Tags {
1007    fn keys(&self) -> Vec<String> {
1008        self.tags.keys()
1009    }
1010
1011    fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
1012        match self.tags.get(key) {
1013            Some(value) => mp4_value_to_py(py, value),
1014            None => Err(PyKeyError::new_err(key.to_string())),
1015        }
1016    }
1017
1018    fn __contains__(&self, key: &str) -> bool {
1019        self.tags.contains_key(key)
1020    }
1021
1022    fn __len__(&self) -> usize {
1023        self.tags.items.len()
1024    }
1025
1026    fn __iter__(&self, py: Python) -> PyResult<Py<PyAny>> {
1027        let keys = self.tags.keys();
1028        let list = PyList::new(py, &keys)?;
1029        Ok(list.call_method0("__iter__")?.into())
1030    }
1031
1032    fn __repr__(&self) -> String {
1033        format!("MP4Tags(keys={})", self.tags.keys().join(", "))
1034    }
1035}
1036
1037/// MP4 file.
1038#[pyclass(name = "MP4")]
1039struct PyMP4 {
1040    #[pyo3(get)]
1041    info: PyMP4Info,
1042    #[pyo3(get)]
1043    filename: String,
1044    mp4_tags: PyMP4Tags,
1045    tag_dict: Py<PyDict>,
1046    tag_keys: Vec<String>,
1047}
1048
1049impl PyMP4 {
1050    #[inline(always)]
1051    fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
1052        let mut mp4_file = mp4::MP4File::parse(data, filename)?;
1053        mp4_file.ensure_parsed_with_data(data);
1054
1055        let info = PyMP4Info {
1056            length: mp4_file.info.length,
1057            channels: mp4_file.info.channels,
1058            sample_rate: mp4_file.info.sample_rate,
1059            bitrate: mp4_file.info.bitrate,
1060            bits_per_sample: mp4_file.info.bits_per_sample,
1061            codec: mp4_file.info.codec,
1062            codec_description: mp4_file.info.codec_description,
1063        };
1064
1065        // Pre-build Python dict of all tags
1066        let tag_dict = PyDict::new(py);
1067        let tag_keys = mp4_file.tags.keys();
1068        for key in &tag_keys {
1069            if let Some(value) = mp4_file.tags.get(key) {
1070                if let Ok(py_val) = mp4_value_to_py(py, value) {
1071                    let _ = tag_dict.set_item(key.as_str(), py_val);
1072                }
1073            }
1074        }
1075
1076        let mp4_tags = PyMP4Tags {
1077            tags: mp4_file.tags,
1078        };
1079
1080        Ok(PyMP4 {
1081            info,
1082            filename: filename.to_string(),
1083            mp4_tags,
1084            tag_dict: tag_dict.into(),
1085            tag_keys,
1086        })
1087    }
1088}
1089
1090#[pymethods]
1091impl PyMP4 {
1092    #[new]
1093    fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
1094        let data = read_cached(filename)
1095            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
1096        Self::from_data(py, &data, filename)
1097    }
1098
1099    #[getter]
1100    fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
1101        let tags = self.mp4_tags.clone();
1102        Ok(tags.into_pyobject(py)?.into_any().unbind())
1103    }
1104
1105    fn keys(&self) -> Vec<String> {
1106        self.tag_keys.clone()
1107    }
1108
1109    #[inline(always)]
1110    fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
1111        let dict = self.tag_dict.bind(py);
1112        match dict.get_item(key)? {
1113            Some(val) => Ok(val.unbind()),
1114            None => Err(PyKeyError::new_err(key.to_string())),
1115        }
1116    }
1117
1118    fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
1119        let tag_value = py_to_mp4_value(key, value)?;
1120        // Update cached Python dict
1121        let py_val = mp4_value_to_py(py, &tag_value)?;
1122        let _ = self.tag_dict.bind(py).set_item(key, py_val);
1123        if !self.tag_keys.contains(&key.to_string()) {
1124            self.tag_keys.push(key.to_string());
1125        }
1126        // Update underlying tag storage
1127        self.mp4_tags.tags.set(key, tag_value);
1128        Ok(())
1129    }
1130
1131    fn __delitem__(&mut self, py: Python, key: &str) -> PyResult<()> {
1132        let dict = self.tag_dict.bind(py);
1133        if dict.get_item(key)?.is_none() {
1134            return Err(PyKeyError::new_err(key.to_string()));
1135        }
1136        dict.del_item(key)?;
1137        self.tag_keys.retain(|k| k != key);
1138        self.mp4_tags.tags.delete(key);
1139        Ok(())
1140    }
1141
1142    fn __contains__(&self, py: Python, key: &str) -> bool {
1143        self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
1144    }
1145
1146    fn save(&self) -> PyResult<()> {
1147        mp4::save_mp4_tags(&self.filename, &self.mp4_tags.tags)
1148            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
1149        invalidate_file(&self.filename);
1150        Ok(())
1151    }
1152
1153    fn delete(&self) -> PyResult<()> {
1154        let empty = mp4::MP4Tags::new();
1155        mp4::save_mp4_tags(&self.filename, &empty)
1156            .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
1157        invalidate_file(&self.filename);
1158        Ok(())
1159    }
1160
1161    fn __repr__(&self) -> String {
1162        format!("MP4(filename={:?})", self.filename)
1163    }
1164
1165    fn add_tags(&self) -> PyResult<()> {
1166        Ok(())
1167    }
1168
1169    fn clear(&mut self, py: Python) -> PyResult<()> {
1170        self.mp4_tags.tags.items.clear();
1171        self.tag_keys.clear();
1172        let dict = self.tag_dict.bind(py);
1173        dict.clear();
1174        Ok(())
1175    }
1176}
1177
1178// ---- Helper functions ----
1179
1180#[inline(always)]
1181fn make_mpeg_info(info: &mp3::MPEGInfo) -> PyMPEGInfo {
1182    PyMPEGInfo {
1183        length: info.length,
1184        channels: info.channels,
1185        bitrate: info.bitrate,
1186        sample_rate: info.sample_rate,
1187        version: info.version,
1188        layer: info.layer,
1189        mode: info.mode,
1190        protected: info.protected,
1191        bitrate_mode: match info.bitrate_mode {
1192            mp3::xing::BitrateMode::Unknown => 0,
1193            mp3::xing::BitrateMode::CBR => 1,
1194            mp3::xing::BitrateMode::VBR => 2,
1195            mp3::xing::BitrateMode::ABR => 3,
1196        },
1197        encoder_info: info.encoder_info.clone(),
1198        encoder_settings: info.encoder_settings.clone(),
1199        track_gain: info.track_gain,
1200        track_peak: info.track_peak,
1201        album_gain: info.album_gain,
1202    }
1203}
1204
1205#[inline(always)]
1206fn frame_to_py(py: Python, frame: &id3::frames::Frame) -> Py<PyAny> {
1207    match frame {
1208        id3::frames::Frame::Text(f) => {
1209            if f.text.len() == 1 {
1210                f.text[0].as_str().into_pyobject(py).unwrap().into_any().unbind()
1211            } else {
1212                let list = PyList::new(py, &f.text).unwrap();
1213                list.into_any().unbind()
1214            }
1215        }
1216        id3::frames::Frame::UserText(f) => {
1217            if f.text.len() == 1 {
1218                f.text[0].as_str().into_pyobject(py).unwrap().into_any().unbind()
1219            } else {
1220                let list = PyList::new(py, &f.text).unwrap();
1221                list.into_any().unbind()
1222            }
1223        }
1224        id3::frames::Frame::Url(f) => {
1225            f.url.as_str().into_pyobject(py).unwrap().into_any().unbind()
1226        }
1227        id3::frames::Frame::UserUrl(f) => {
1228            f.url.as_str().into_pyobject(py).unwrap().into_any().unbind()
1229        }
1230        id3::frames::Frame::Comment(f) => {
1231            f.text.as_str().into_pyobject(py).unwrap().into_any().unbind()
1232        }
1233        id3::frames::Frame::Lyrics(f) => {
1234            f.text.as_str().into_pyobject(py).unwrap().into_any().unbind()
1235        }
1236        id3::frames::Frame::Picture(f) => {
1237            let dict = PyDict::new(py);
1238            dict.set_item("mime", &f.mime).unwrap();
1239            dict.set_item("type", f.pic_type as u8).unwrap();
1240            dict.set_item("desc", &f.desc).unwrap();
1241            dict.set_item("data", PyBytes::new(py, &f.data)).unwrap();
1242            dict.into_any().unbind()
1243        }
1244        id3::frames::Frame::Popularimeter(f) => {
1245            Py::new(py, PyPOPM {
1246                email: f.email.clone(),
1247                rating: f.rating,
1248                count: f.count,
1249            }).unwrap().into_any()
1250        }
1251        id3::frames::Frame::Binary(f) => {
1252            PyBytes::new(py, &f.data).into_any().unbind()
1253        }
1254        id3::frames::Frame::PairedText(f) => {
1255            let pairs: Vec<(&str, &str)> = f.people.iter().map(|(a, b)| (a.as_str(), b.as_str())).collect();
1256            let list = PyList::new(py, &pairs).unwrap();
1257            list.into_any().unbind()
1258        }
1259    }
1260}
1261
1262#[inline(always)]
1263fn mp4_value_to_py(py: Python, value: &mp4::MP4TagValue) -> PyResult<Py<PyAny>> {
1264    match value {
1265        mp4::MP4TagValue::Text(v) => {
1266            if v.len() == 1 {
1267                Ok(v[0].as_str().into_pyobject(py)?.into_any().unbind())
1268            } else {
1269                Ok(PyList::new(py, v)?.into_any().unbind())
1270            }
1271        }
1272        mp4::MP4TagValue::Integer(v) => {
1273            if v.len() == 1 {
1274                Ok(v[0].into_pyobject(py)?.into_any().unbind())
1275            } else {
1276                Ok(PyList::new(py, v)?.into_any().unbind())
1277            }
1278        }
1279        mp4::MP4TagValue::IntPair(v) => {
1280            let pairs: Vec<_> = v.iter().map(|(a, b)| (*a, *b)).collect();
1281            if pairs.len() == 1 {
1282                Ok(PyTuple::new(py, &[pairs[0].0, pairs[0].1])?.into_any().unbind())
1283            } else {
1284                let list = PyList::empty(py);
1285                for (a, b) in &pairs {
1286                    list.append(PyTuple::new(py, &[*a, *b])?)?;
1287                }
1288                Ok(list.into_any().unbind())
1289            }
1290        }
1291        mp4::MP4TagValue::Bool(v) => {
1292            Ok((*v).into_pyobject(py)?.to_owned().into_any().unbind())
1293        }
1294        mp4::MP4TagValue::Cover(covers) => {
1295            let list = PyList::empty(py);
1296            for cover in covers {
1297                let dict = PyDict::new(py);
1298                dict.set_item("data", PyBytes::new(py, &cover.data))?;
1299                dict.set_item("format", cover.format as u8)?;
1300                list.append(dict)?;
1301            }
1302            Ok(list.into_any().unbind())
1303        }
1304        mp4::MP4TagValue::FreeForm(forms) => {
1305            let list = PyList::empty(py);
1306            for form in forms {
1307                list.append(PyBytes::new(py, &form.data))?;
1308            }
1309            Ok(list.into_any().unbind())
1310        }
1311        mp4::MP4TagValue::Data(d) => {
1312            Ok(PyBytes::new(py, d).into_any().unbind())
1313        }
1314    }
1315}
1316
1317/// Convert a Python value to an MP4TagValue based on the key and value type.
1318fn py_to_mp4_value(key: &str, value: &Bound<'_, PyAny>) -> PyResult<mp4::MP4TagValue> {
1319    // Cover art: list of bytes objects or list of dicts with data/format
1320    if key == "covr" {
1321        if let Ok(list) = value.cast::<PyList>() {
1322            let mut covers = Vec::new();
1323            for item in list.iter() {
1324                // Try bytes first (most common: [b'\x89PNG...'])
1325                if let Ok(data) = item.extract::<Vec<u8>>() {
1326                    let fmt = if data.starts_with(b"\x89PNG") {
1327                        mp4::MP4CoverFormat::PNG
1328                    } else {
1329                        mp4::MP4CoverFormat::JPEG
1330                    };
1331                    covers.push(mp4::MP4Cover { data, format: fmt });
1332                } else if let Ok(dict) = item.cast::<PyDict>() {
1333                    // Dict with data/format keys
1334                    if let (Some(data_obj), Some(fmt_obj)) = (dict.get_item("data")?, dict.get_item("format")?) {
1335                        let data = data_obj.extract::<Vec<u8>>()?;
1336                        let fmt_int = fmt_obj.extract::<u32>().unwrap_or(13);
1337                        let format = if fmt_int == 14 { mp4::MP4CoverFormat::PNG } else { mp4::MP4CoverFormat::JPEG };
1338                        covers.push(mp4::MP4Cover { data, format });
1339                    }
1340                }
1341            }
1342            if !covers.is_empty() {
1343                return Ok(mp4::MP4TagValue::Cover(covers));
1344            }
1345        }
1346        // Single bytes object
1347        if let Ok(data) = value.extract::<Vec<u8>>() {
1348            let fmt = if data.starts_with(b"\x89PNG") {
1349                mp4::MP4CoverFormat::PNG
1350            } else {
1351                mp4::MP4CoverFormat::JPEG
1352            };
1353            return Ok(mp4::MP4TagValue::Cover(vec![mp4::MP4Cover { data, format: fmt }]));
1354        }
1355    }
1356    // Int pairs (trkn, disk): [(num, total)]
1357    if key == "trkn" || key == "disk" {
1358        if let Ok(pairs) = value.extract::<Vec<(i32, i32)>>() {
1359            return Ok(mp4::MP4TagValue::IntPair(pairs));
1360        }
1361        if let Ok(pair) = value.extract::<(i32, i32)>() {
1362            return Ok(mp4::MP4TagValue::IntPair(vec![pair]));
1363        }
1364    }
1365    // List of strings (most common for text tags)
1366    if let Ok(strings) = value.extract::<Vec<String>>() {
1367        return Ok(mp4::MP4TagValue::Text(strings));
1368    }
1369    // Single string
1370    if let Ok(s) = value.extract::<String>() {
1371        return Ok(mp4::MP4TagValue::Text(vec![s]));
1372    }
1373    // Bool (check before int since bool extracts as int too)
1374    if let Ok(b) = value.extract::<bool>() {
1375        return Ok(mp4::MP4TagValue::Bool(b));
1376    }
1377    // Integer
1378    if let Ok(i) = value.extract::<i64>() {
1379        return Ok(mp4::MP4TagValue::Integer(vec![i]));
1380    }
1381    // List of integers
1382    if let Ok(ints) = value.extract::<Vec<i64>>() {
1383        return Ok(mp4::MP4TagValue::Integer(ints));
1384    }
1385    // Raw bytes
1386    if let Ok(data) = value.extract::<Vec<u8>>() {
1387        return Ok(mp4::MP4TagValue::Data(data));
1388    }
1389    // Freeform: list of bytes
1390    if let Ok(list) = value.cast::<PyList>() {
1391        let mut forms = Vec::new();
1392        for item in list.iter() {
1393            if let Ok(data) = item.extract::<Vec<u8>>() {
1394                forms.push(mp4::MP4FreeForm { data, dataformat: 1 });
1395            }
1396        }
1397        if !forms.is_empty() {
1398            return Ok(mp4::MP4TagValue::FreeForm(forms));
1399        }
1400    }
1401
1402    Err(PyValueError::new_err(format!(
1403        "Cannot convert value for MP4 key '{}': unsupported type", key
1404    )))
1405}
1406
1407// ---- Batch API ----
1408
1409/// Pre-serialized tag value — all decoding done in parallel phase.
1410#[derive(Clone)]
1411enum BatchTagValue {
1412    Text(String),
1413    TextList(Vec<String>),
1414    Bytes(Vec<u8>),
1415    Int(i64),
1416    IntPair(i32, i32),
1417    Bool(bool),
1418    Picture { mime: String, pic_type: u8, desc: String, data: Vec<u8> },
1419    Popularimeter { email: String, rating: u8, count: u64 },
1420    PairedText(Vec<(String, String)>),
1421    CoverList(Vec<(Vec<u8>, u8)>),
1422    FreeFormList(Vec<Vec<u8>>),
1423}
1424
1425/// Pre-serialized file — all Rust work done, ready for Python wrapping.
1426#[derive(Clone)]
1427struct PreSerializedFile {
1428    length: f64,
1429    sample_rate: u32,
1430    channels: u32,
1431    bitrate: Option<u32>,
1432    tags: Vec<(String, BatchTagValue)>,
1433    // Format-specific extra metadata (emitted as dict entries in _fast_read)
1434    extra: Vec<(&'static str, BatchTagValue)>,
1435    // Lazy VC tag support: raw Vorbis Comment bytes (copied from file data).
1436    // When set, tags will be parsed on-demand, skipping String allocation during batch parallel phase.
1437    lazy_vc: Option<Vec<u8>>,
1438}
1439
1440/// Convert a Frame to a BatchTagValue (runs in parallel phase, no GIL needed).
1441#[inline(always)]
1442fn frame_to_batch_value(frame: &id3::frames::Frame) -> BatchTagValue {
1443    match frame {
1444        id3::frames::Frame::Text(f) => {
1445            if f.text.len() == 1 {
1446                BatchTagValue::Text(f.text[0].clone())
1447            } else {
1448                BatchTagValue::TextList(f.text.clone())
1449            }
1450        }
1451        id3::frames::Frame::UserText(f) => {
1452            if f.text.len() == 1 {
1453                BatchTagValue::Text(f.text[0].clone())
1454            } else {
1455                BatchTagValue::TextList(f.text.clone())
1456            }
1457        }
1458        id3::frames::Frame::Url(f) => BatchTagValue::Text(f.url.clone()),
1459        id3::frames::Frame::UserUrl(f) => BatchTagValue::Text(f.url.clone()),
1460        id3::frames::Frame::Comment(f) => BatchTagValue::Text(f.text.clone()),
1461        id3::frames::Frame::Lyrics(f) => BatchTagValue::Text(f.text.clone()),
1462        id3::frames::Frame::Picture(f) => BatchTagValue::Picture {
1463            mime: f.mime.clone(),
1464            pic_type: f.pic_type as u8,
1465            desc: f.desc.clone(),
1466            data: f.data.clone(),
1467        },
1468        id3::frames::Frame::Popularimeter(f) => BatchTagValue::Popularimeter {
1469            email: f.email.clone(),
1470            rating: f.rating,
1471            count: f.count,
1472        },
1473        id3::frames::Frame::Binary(f) => BatchTagValue::Bytes(f.data.clone()),
1474        id3::frames::Frame::PairedText(f) => BatchTagValue::PairedText(f.people.clone()),
1475    }
1476}
1477
1478/// Parse VorbisComment data directly into batch tags — single-pass, minimal allocations.
1479/// Skips vendor string, uses memchr for fast '=' finding, groups by key inline.
1480#[inline(always)]
1481fn parse_vc_to_batch_tags(data: &[u8]) -> Vec<(String, BatchTagValue)> {
1482    if data.len() < 8 { return Vec::new(); }
1483    let mut pos = 0usize;
1484
1485    // Skip vendor string
1486    let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
1487    pos += 4;
1488    if pos + vendor_len > data.len() { return Vec::new(); }
1489    pos += vendor_len;
1490
1491    if pos + 4 > data.len() { return Vec::new(); }
1492    let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
1493    pos += 4;
1494
1495    let mut tags: Vec<(String, BatchTagValue)> = Vec::with_capacity(count.min(64));
1496
1497    for _ in 0..count {
1498        if pos + 4 > data.len() { break; }
1499        let comment_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
1500        pos += 4;
1501        if pos + comment_len > data.len() { break; }
1502
1503        let raw = &data[pos..pos + comment_len];
1504        pos += comment_len;
1505
1506        // Find '=' separator using memchr (SIMD-accelerated)
1507        let eq_pos = match memchr::memchr(b'=', raw) {
1508            Some(p) => p,
1509            None => continue,
1510        };
1511
1512        let key_bytes = &raw[..eq_pos];
1513        let value_bytes = &raw[eq_pos + 1..];
1514
1515        // Key: lowercase ASCII (matches mutagen behavior)
1516        let key = if key_bytes.iter().all(|&b| !b.is_ascii_uppercase()) {
1517            match std::str::from_utf8(key_bytes) {
1518                Ok(s) => s.to_string(),
1519                Err(_) => continue,
1520            }
1521        } else {
1522            // Fast ASCII lowercase (no allocation for checking)
1523            let mut k = String::with_capacity(key_bytes.len());
1524            for &b in key_bytes {
1525                k.push(if b.is_ascii_uppercase() { (b + 32) as char } else { b as char });
1526            }
1527            k
1528        };
1529
1530        // Value: zero-copy if valid UTF-8
1531        let value = match std::str::from_utf8(value_bytes) {
1532            Ok(s) => s.to_string(),
1533            Err(_) => String::from_utf8_lossy(value_bytes).into_owned(),
1534        };
1535
1536        // Group by key (linear scan — fast for typical 5-15 unique keys)
1537        if let Some(entry) = tags.iter_mut().find(|(k, _)| k == &key) {
1538            if let BatchTagValue::TextList(ref mut v) = entry.1 {
1539                v.push(value);
1540            }
1541        } else {
1542            tags.push((key, BatchTagValue::TextList(vec![value])));
1543        }
1544    }
1545
1546    tags
1547}
1548
1549/// Batch-optimized FLAC parser: skips pictures, direct VC parsing.
1550#[inline(always)]
1551fn parse_flac_batch(data: &[u8], file_size: usize) -> Option<PreSerializedFile> {
1552    let flac_offset = if data.len() >= 4 && &data[0..4] == b"fLaC" {
1553        0
1554    } else if data.len() >= 10 && &data[0..3] == b"ID3" {
1555        let size = crate::id3::header::BitPaddedInt::syncsafe(&data[6..10]) as usize;
1556        let off = 10 + size;
1557        if off + 4 > data.len() || &data[off..off+4] != b"fLaC" { return None; }
1558        off
1559    } else {
1560        return None;
1561    };
1562
1563    let mut pos = flac_offset + 4;
1564    let mut sample_rate = 0u32;
1565    let mut channels = 0u8;
1566    let mut length = 0.0f64;
1567    let mut bits_per_sample = 0u8;
1568    let mut total_samples = 0u64;
1569    let mut vc_pos: Option<(usize, usize)> = None;
1570
1571    loop {
1572        if pos + 4 > data.len() { break; }
1573        let header = data[pos];
1574        let is_last = header & 0x80 != 0;
1575        let bt = header & 0x7F;
1576        let block_size = ((data[pos+1] as usize) << 16) | ((data[pos+2] as usize) << 8) | (data[pos+3] as usize);
1577        pos += 4;
1578        if pos + block_size > data.len() { break; }
1579
1580        match bt {
1581            0 => {
1582                if let Ok(si) = flac::StreamInfo::parse(&data[pos..pos+block_size]) {
1583                    sample_rate = si.sample_rate;
1584                    channels = si.channels;
1585                    length = si.length;
1586                    bits_per_sample = si.bits_per_sample;
1587                    total_samples = si.total_samples;
1588                }
1589            }
1590            4 => {
1591                // Compute actual VC size from internal lengths (handles incorrect block_size headers)
1592                let vc_size = flac::compute_vc_data_size(&data[pos..]).unwrap_or(block_size);
1593                vc_pos = Some((pos, vc_size));
1594            }
1595            _ => {}
1596        }
1597
1598        pos += block_size;
1599        // Early break: we only need StreamInfo + VC, skip remaining blocks
1600        if is_last || (sample_rate > 0 && vc_pos.is_some()) { break; }
1601    }
1602
1603    if sample_rate == 0 { return None; }
1604
1605    // Lazy VC: copy just the VC raw bytes (typically 100-1000 bytes), defer parsing to access time.
1606    // This avoids ~15 String allocations per file during the rayon parallel phase.
1607    let lazy_vc = vc_pos.map(|(off, sz)| data[off..off.saturating_add(sz).min(data.len())].to_vec());
1608
1609    // Bitrate: use audio data size only (exclude metadata), matching mutagen behavior
1610    // pos points to the start of audio frames after the metadata block loop
1611    let audio_data_size = file_size.saturating_sub(pos);
1612    let bitrate = if length > 0.0 {
1613        Some((audio_data_size as f64 * 8.0 / length) as u32)
1614    } else { None };
1615
1616    Some(PreSerializedFile {
1617        length,
1618        sample_rate,
1619        channels: channels as u32,
1620        bitrate,
1621        tags: Vec::new(),
1622        extra: vec![
1623            ("bits_per_sample", BatchTagValue::Int(bits_per_sample as i64)),
1624            ("total_samples", BatchTagValue::Int(total_samples as i64)),
1625        ],
1626        lazy_vc,
1627    })
1628}
1629
1630/// Batch-optimized OGG Vorbis parser: inline page headers, direct VC parsing.
1631#[inline(always)]
1632fn parse_ogg_batch(data: &[u8]) -> Option<PreSerializedFile> {
1633    if data.len() < 58 || &data[0..4] != b"OggS" { return None; }
1634
1635    let serial = u32::from_le_bytes([data[14], data[15], data[16], data[17]]);
1636    let num_seg = data[26] as usize;
1637    let seg_table_end = 27 + num_seg;
1638    if seg_table_end > data.len() { return None; }
1639
1640    let page_data_size: usize = data[27..seg_table_end].iter().map(|&s| s as usize).sum();
1641    let first_page_end = seg_table_end + page_data_size;
1642
1643    if seg_table_end + 30 > data.len() { return None; }
1644    let id_data = &data[seg_table_end..];
1645    if id_data.len() < 30 || &id_data[0..7] != b"\x01vorbis" { return None; }
1646
1647    let channels = id_data[11];
1648    let sample_rate = u32::from_le_bytes([id_data[12], id_data[13], id_data[14], id_data[15]]);
1649    let nominal_bitrate = u32::from_le_bytes([id_data[20], id_data[21], id_data[22], id_data[23]]);
1650
1651    if first_page_end + 27 > data.len() { return None; }
1652    if &data[first_page_end..first_page_end+4] != b"OggS" { return None; }
1653
1654    // Try fast single-page path first (zero-copy), fall back to multi-page assembly
1655    let seg2_count = data[first_page_end + 26] as usize;
1656    let seg2_table_start = first_page_end + 27;
1657    let seg2_table_end = seg2_table_start + seg2_count;
1658    if seg2_table_end > data.len() { return None; }
1659
1660    let seg2_table = &data[seg2_table_start..seg2_table_end];
1661    let mut first_packet_size = 0usize;
1662    let mut single_page = false;
1663    for &seg in seg2_table {
1664        first_packet_size += seg as usize;
1665        if seg < 255 { single_page = true; break; }
1666    }
1667
1668    let length = ogg::find_last_granule(data, serial)
1669        .map(|g| if g > 0 && sample_rate > 0 { g as f64 / sample_rate as f64 } else { 0.0 })
1670        .unwrap_or(0.0);
1671
1672    let bitrate = if nominal_bitrate > 0 {
1673        Some(nominal_bitrate)
1674    } else if length > 0.0 {
1675        Some((data.len() as f64 * 8.0 / length) as u32)
1676    } else { None };
1677
1678    let lazy_vc = if single_page {
1679        // Fast path: packet fits in one page, zero-copy
1680        let comment_start = seg2_table_end;
1681        if comment_start + first_packet_size > data.len() { return None; }
1682        if first_packet_size < 7 { return None; }
1683        if &data[comment_start..comment_start+7] != b"\x03vorbis" { return None; }
1684        Some(data[comment_start + 7..comment_start + first_packet_size].to_vec())
1685    } else {
1686        // Slow path: multi-page assembly
1687        let comment_packet = ogg::ogg_assemble_first_packet(data, first_page_end)?;
1688        if comment_packet.len() < 7 { return None; }
1689        if &comment_packet[0..7] != b"\x03vorbis" { return None; }
1690        Some(comment_packet[7..].to_vec())
1691    };
1692
1693    Some(PreSerializedFile {
1694        length,
1695        sample_rate,
1696        channels: channels as u32,
1697        bitrate,
1698        tags: Vec::new(),
1699        extra: Vec::new(),
1700        lazy_vc,
1701    })
1702}
1703
1704/// Convert MP4TagValue to BatchTagValue (inline, no extra lookup).
1705#[inline(always)]
1706fn mp4_value_to_batch(value: &mp4::MP4TagValue) -> BatchTagValue {
1707    match value {
1708        mp4::MP4TagValue::Text(v) => {
1709            if v.len() == 1 { BatchTagValue::Text(v[0].clone()) }
1710            else { BatchTagValue::TextList(v.clone()) }
1711        }
1712        mp4::MP4TagValue::Integer(v) => {
1713            if v.len() == 1 { BatchTagValue::Int(v[0] as i64) }
1714            else { BatchTagValue::TextList(v.iter().map(|i| itoa::Buffer::new().format(*i).to_string()).collect()) }
1715        }
1716        mp4::MP4TagValue::IntPair(v) => {
1717            if v.len() == 1 { BatchTagValue::IntPair(v[0].0, v[0].1) }
1718            else { BatchTagValue::TextList(v.iter().map(|(a,b)| { let mut s = String::with_capacity(12); s.push('('); s.push_str(itoa::Buffer::new().format(*a)); s.push(','); s.push_str(itoa::Buffer::new().format(*b)); s.push(')'); s }).collect()) }
1719        }
1720        mp4::MP4TagValue::Bool(v) => BatchTagValue::Bool(*v),
1721        mp4::MP4TagValue::Cover(covers) => {
1722            BatchTagValue::CoverList(covers.iter().map(|c| (c.data.clone(), c.format as u8)).collect())
1723        }
1724        mp4::MP4TagValue::FreeForm(forms) => {
1725            BatchTagValue::FreeFormList(forms.iter().map(|f| f.data.clone()).collect())
1726        }
1727        mp4::MP4TagValue::Data(d) => BatchTagValue::Bytes(d.clone()),
1728    }
1729}
1730
1731/// Parse MP3 data into batch result.
1732#[inline(always)]
1733fn parse_mp3_batch(data: &[u8], path: &str) -> Option<PreSerializedFile> {
1734    let mut f = mp3::MP3File::parse(data, path).ok()?;
1735    f.ensure_tags_parsed(data);
1736    let mut tags = Vec::with_capacity(f.tags.frames.len());
1737    let mut has_tdrc = f.tags.frames.iter().any(|(k, _)| k.as_str() == "TDRC");
1738    for (hash_key, frames) in f.tags.frames.iter_mut() {
1739        if let Some(lf) = frames.first_mut() {
1740            if let Ok(frame) = lf.decode_with_buf(&f.tags.raw_buf) {
1741                let key = hash_key.as_str();
1742                // TYER→TDRC normalization (matches mutagen behavior)
1743                if key == "TYER" {
1744                    if has_tdrc { continue; }
1745                    has_tdrc = true;
1746                    tags.push(("TDRC".to_string(), frame_to_batch_value(frame)));
1747                } else {
1748                    tags.push((key.to_string(), frame_to_batch_value(frame)));
1749                }
1750            }
1751        }
1752    }
1753    // MP3-specific extra metadata
1754    let extra = vec![
1755        ("version", BatchTagValue::Text(ryu::Buffer::new().format(f.info.version).to_string())),
1756        ("layer", BatchTagValue::Int(f.info.layer as i64)),
1757        ("mode", BatchTagValue::Int(f.info.mode as i64)),
1758        ("protected", BatchTagValue::Bool(f.info.protected)),
1759        ("bitrate_mode", BatchTagValue::Int(match f.info.bitrate_mode {
1760            mp3::xing::BitrateMode::Unknown => 0,
1761            mp3::xing::BitrateMode::CBR => 1,
1762            mp3::xing::BitrateMode::VBR => 2,
1763            mp3::xing::BitrateMode::ABR => 3,
1764        })),
1765    ];
1766    Some(PreSerializedFile {
1767        length: f.info.length,
1768        sample_rate: f.info.sample_rate,
1769        channels: f.info.channels,
1770        bitrate: Some(f.info.bitrate),
1771        tags,
1772        extra,
1773        lazy_vc: None,
1774    })
1775}
1776
1777/// Parse MP4 data into batch result.
1778#[inline(always)]
1779fn parse_mp4_batch(data: &[u8], path: &str) -> Option<PreSerializedFile> {
1780    let mut f = mp4::MP4File::parse(data, path).ok()?;
1781    f.ensure_parsed_with_data(data);
1782    let mut tags = Vec::with_capacity(f.tags.items.len());
1783    for (key, value) in f.tags.items.iter() {
1784        tags.push((key.clone(), mp4_value_to_batch(value)));
1785    }
1786    let extra = vec![
1787        ("codec", BatchTagValue::Text(f.info.codec.clone())),
1788        ("bits_per_sample", BatchTagValue::Int(f.info.bits_per_sample as i64)),
1789    ];
1790    Some(PreSerializedFile {
1791        length: f.info.length,
1792        sample_rate: f.info.sample_rate,
1793        channels: f.info.channels as u32,
1794        bitrate: if f.info.bitrate > 0 { Some(f.info.bitrate) } else { None },
1795        tags,
1796        extra,
1797        lazy_vc: None,
1798    })
1799}
1800
1801/// Parse + fully decode a single file from data (runs in parallel phase).
1802/// Uses extension-based fast dispatch to skip unnecessary scoring.
1803#[inline(always)]
1804fn parse_and_serialize(data: &[u8], path: &str) -> Option<PreSerializedFile> {
1805    let ext = path.rsplit('.').next().unwrap_or("");
1806    if ext.eq_ignore_ascii_case("flac") {
1807        return parse_flac_batch(data, data.len());
1808    }
1809    if ext.eq_ignore_ascii_case("ogg") {
1810        return parse_ogg_batch(data);
1811    }
1812    if ext.eq_ignore_ascii_case("mp3") {
1813        return parse_mp3_batch(data, path);
1814    }
1815    if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
1816        || ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
1817        return parse_mp4_batch(data, path);
1818    }
1819
1820    let mp3_score = mp3::MP3File::score(path, data);
1821    let flac_score = flac::FLACFile::score(path, data);
1822    let ogg_score = ogg::OggVorbisFile::score(path, data);
1823    let mp4_score = mp4::MP4File::score(path, data);
1824    let max_score = mp3_score.max(flac_score).max(ogg_score).max(mp4_score);
1825
1826    if max_score == 0 {
1827        return None;
1828    }
1829
1830    if max_score == flac_score {
1831        parse_flac_batch(data, data.len())
1832    } else if max_score == ogg_score {
1833        parse_ogg_batch(data)
1834    } else if max_score == mp4_score {
1835        parse_mp4_batch(data, path)
1836    } else {
1837        parse_mp3_batch(data, path)
1838    }
1839}
1840
1841/// Convert pre-serialized BatchTagValue to Python object (minimal serial work).
1842#[inline(always)]
1843fn batch_value_to_py(py: Python<'_>, bv: &BatchTagValue) -> PyResult<Py<PyAny>> {
1844    match bv {
1845        BatchTagValue::Text(s) => Ok(s.as_str().into_pyobject(py)?.into_any().unbind()),
1846        BatchTagValue::TextList(v) => Ok(PyList::new(py, v)?.into_any().unbind()),
1847        BatchTagValue::Bytes(d) => Ok(PyBytes::new(py, d).into_any().unbind()),
1848        BatchTagValue::Int(i) => Ok(i.into_pyobject(py)?.into_any().unbind()),
1849        BatchTagValue::IntPair(a, b) => Ok(PyTuple::new(py, &[*a, *b])?.into_any().unbind()),
1850        BatchTagValue::Bool(v) => Ok((*v).into_pyobject(py)?.to_owned().into_any().unbind()),
1851        BatchTagValue::Picture { mime, pic_type, desc, data } => {
1852            let dict = PyDict::new(py);
1853            dict.set_item(pyo3::intern!(py, "mime"), mime.as_str())?;
1854            dict.set_item(pyo3::intern!(py, "type"), *pic_type)?;
1855            dict.set_item(pyo3::intern!(py, "desc"), desc.as_str())?;
1856            dict.set_item(pyo3::intern!(py, "data"), PyBytes::new(py, data))?;
1857            Ok(dict.into_any().unbind())
1858        }
1859        BatchTagValue::Popularimeter { email, rating, count } => {
1860            Ok(Py::new(py, PyPOPM {
1861                email: email.clone(),
1862                rating: *rating,
1863                count: *count,
1864            })?.into_any())
1865        }
1866        BatchTagValue::PairedText(pairs) => {
1867            let py_pairs: Vec<(&str, &str)> = pairs.iter().map(|(a, b)| (a.as_str(), b.as_str())).collect();
1868            Ok(PyList::new(py, &py_pairs)?.into_any().unbind())
1869        }
1870        BatchTagValue::CoverList(covers) => {
1871            let list = PyList::empty(py);
1872            for (data, format) in covers {
1873                let dict = PyDict::new(py);
1874                dict.set_item(pyo3::intern!(py, "data"), PyBytes::new(py, data))?;
1875                dict.set_item(pyo3::intern!(py, "format"), *format)?;
1876                list.append(dict)?;
1877            }
1878            Ok(list.into_any().unbind())
1879        }
1880        BatchTagValue::FreeFormList(forms) => {
1881            let list = PyList::empty(py);
1882            for data in forms {
1883                list.append(PyBytes::new(py, data))?;
1884            }
1885            Ok(list.into_any().unbind())
1886        }
1887    }
1888}
1889
1890/// Convert BatchTagValue to raw *mut PyObject (bypasses PyO3 wrappers for speed).
1891/// Returns new reference. Caller must Py_DECREF.
1892#[inline(always)]
1893unsafe fn batch_value_to_py_ffi(py: Python<'_>, bv: &BatchTagValue) -> *mut pyo3::ffi::PyObject {
1894    match bv {
1895        BatchTagValue::Text(s) => {
1896            pyo3::ffi::PyUnicode_FromStringAndSize(
1897                s.as_ptr() as *const std::ffi::c_char,
1898                s.len() as pyo3::ffi::Py_ssize_t)
1899        }
1900        BatchTagValue::TextList(v) => {
1901            let list = pyo3::ffi::PyList_New(v.len() as pyo3::ffi::Py_ssize_t);
1902            if list.is_null() { return std::ptr::null_mut(); }
1903            for (i, s) in v.iter().enumerate() {
1904                let obj = pyo3::ffi::PyUnicode_FromStringAndSize(
1905                    s.as_ptr() as *const std::ffi::c_char,
1906                    s.len() as pyo3::ffi::Py_ssize_t);
1907                pyo3::ffi::PyList_SET_ITEM(list, i as pyo3::ffi::Py_ssize_t, obj); // steals ref
1908            }
1909            list
1910        }
1911        BatchTagValue::Bytes(d) => {
1912            pyo3::ffi::PyBytes_FromStringAndSize(
1913                d.as_ptr() as *const std::ffi::c_char,
1914                d.len() as pyo3::ffi::Py_ssize_t)
1915        }
1916        BatchTagValue::Int(i) => pyo3::ffi::PyLong_FromLongLong(*i),
1917        BatchTagValue::IntPair(a, b) => {
1918            // Fall back to PyO3 for tuple creation (rare path)
1919            match PyTuple::new(py, &[*a, *b]) {
1920                Ok(t) => { let ptr = t.as_ptr(); pyo3::ffi::Py_INCREF(ptr); ptr }
1921                Err(_) => std::ptr::null_mut()
1922            }
1923        }
1924        BatchTagValue::Bool(v) => {
1925            if *v { pyo3::ffi::Py_INCREF(pyo3::ffi::Py_True()); pyo3::ffi::Py_True() }
1926            else { pyo3::ffi::Py_INCREF(pyo3::ffi::Py_False()); pyo3::ffi::Py_False() }
1927        }
1928        // Complex types: fall back to PyO3 (rare paths, not worth optimizing)
1929        _ => {
1930            match batch_value_to_py(py, bv) {
1931                Ok(obj) => { let ptr = obj.as_ptr(); pyo3::ffi::Py_INCREF(ptr); ptr }
1932                Err(_) => std::ptr::null_mut()
1933            }
1934        }
1935    }
1936}
1937
1938/// Convert pre-serialized file to Python dict using raw CPython FFI (faster than PyO3 wrappers).
1939#[inline(always)]
1940fn preserialized_to_py_dict(py: Python<'_>, pf: &PreSerializedFile) -> PyResult<Py<PyAny>> {
1941    unsafe {
1942        let inner = pyo3::ffi::PyDict_New();
1943        if inner.is_null() {
1944            return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
1945        }
1946        set_dict_f64(inner, pyo3::intern!(py, "length").as_ptr(), pf.length);
1947        set_dict_u32(inner, pyo3::intern!(py, "sample_rate").as_ptr(), pf.sample_rate);
1948        set_dict_u32(inner, pyo3::intern!(py, "channels").as_ptr(), pf.channels);
1949        if let Some(br) = pf.bitrate {
1950            set_dict_u32(inner, pyo3::intern!(py, "bitrate").as_ptr(), br);
1951        }
1952        // Direct VC→Python FFI path: skip Rust String intermediary for lazy VC
1953        if pf.tags.is_empty() {
1954            if let Some(ref vc_bytes) = pf.lazy_vc {
1955                let tags_dict = pyo3::ffi::PyDict_New();
1956                if !tags_dict.is_null() {
1957                    parse_vc_to_ffi_dict(vc_bytes, tags_dict);
1958                    pyo3::ffi::PyDict_SetItem(inner, pyo3::intern!(py, "tags").as_ptr(), tags_dict);
1959                    pyo3::ffi::Py_DECREF(tags_dict);
1960                }
1961            } else {
1962                // Empty tags
1963                let tags_dict = pyo3::ffi::PyDict_New();
1964                if !tags_dict.is_null() {
1965                    pyo3::ffi::PyDict_SetItem(inner, pyo3::intern!(py, "tags").as_ptr(), tags_dict);
1966                    pyo3::ffi::Py_DECREF(tags_dict);
1967                }
1968            }
1969        } else {
1970            let tags_dict = pyo3::ffi::PyDict_New();
1971            if !tags_dict.is_null() {
1972                for (key, value) in &pf.tags {
1973                    let key_ptr = intern_tag_key(key.as_bytes());
1974                    if key_ptr.is_null() { continue; }
1975                    let val_ptr = batch_value_to_py_ffi(py, value);
1976                    if !val_ptr.is_null() {
1977                        pyo3::ffi::PyDict_SetItem(tags_dict, key_ptr, val_ptr);
1978                        pyo3::ffi::Py_DECREF(val_ptr);
1979                    }
1980                    pyo3::ffi::Py_DECREF(key_ptr);
1981                }
1982                pyo3::ffi::PyDict_SetItem(inner, pyo3::intern!(py, "tags").as_ptr(), tags_dict);
1983                pyo3::ffi::Py_DECREF(tags_dict);
1984            }
1985        }
1986        Ok(Bound::from_owned_ptr(py, inner).unbind())
1987    }
1988}
1989
1990/// Parse VC bytes directly into a Python dict using raw FFI.
1991/// Skips intermediate Rust String allocations — goes from raw bytes straight to Python objects.
1992/// Values are wrapped in lists (VC format: duplicate keys are merged into a single list).
1993#[inline(always)]
1994unsafe fn parse_vc_to_ffi_dict(data: &[u8], tags_dict: *mut pyo3::ffi::PyObject) {
1995    if data.len() < 8 { return; }
1996    let mut pos = 0;
1997    let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
1998    pos += 4;
1999    if pos + vendor_len > data.len() { return; }
2000    pos += vendor_len;
2001    if pos + 4 > data.len() { return; }
2002    let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
2003    pos += 4;
2004
2005    for _ in 0..count.min(256) {
2006        if pos + 4 > data.len() { break; }
2007        let clen = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
2008        pos += 4;
2009        if pos + clen > data.len() { break; }
2010        let raw = &data[pos..pos + clen];
2011        pos += clen;
2012
2013        let eq_pos = match memchr::memchr(b'=', raw) {
2014            Some(p) => p,
2015            None => continue,
2016        };
2017        let key_bytes = &raw[..eq_pos];
2018        let value_bytes = &raw[eq_pos + 1..];
2019
2020        // Lowercase key into stack buffer (matches mutagen behavior)
2021        let mut buf = [0u8; 128];
2022        let key_len = key_bytes.len().min(128);
2023        for i in 0..key_len { buf[i] = key_bytes[i].to_ascii_lowercase(); }
2024
2025        let key_ptr = intern_tag_key(&buf[..key_len]);
2026        if key_ptr.is_null() { pyo3::ffi::PyErr_Clear(); continue; }
2027
2028        let val_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
2029            value_bytes.as_ptr() as *const std::ffi::c_char,
2030            value_bytes.len() as pyo3::ffi::Py_ssize_t);
2031        if val_ptr.is_null() {
2032            pyo3::ffi::PyErr_Clear();
2033            pyo3::ffi::Py_DECREF(key_ptr);
2034            continue;
2035        }
2036
2037        // Check for duplicate keys (merge into list)
2038        let existing = pyo3::ffi::PyDict_GetItem(tags_dict, key_ptr);
2039        if !existing.is_null() {
2040            if pyo3::ffi::PyList_Check(existing) != 0 {
2041                pyo3::ffi::PyList_Append(existing, val_ptr);
2042                pyo3::ffi::Py_DECREF(val_ptr);
2043            } else {
2044                let list = pyo3::ffi::PyList_New(2);
2045                pyo3::ffi::Py_INCREF(existing);
2046                pyo3::ffi::PyList_SET_ITEM(list, 0, existing);
2047                pyo3::ffi::PyList_SET_ITEM(list, 1, val_ptr);
2048                pyo3::ffi::PyDict_SetItem(tags_dict, key_ptr, list);
2049                pyo3::ffi::Py_DECREF(list);
2050            }
2051            pyo3::ffi::Py_DECREF(key_ptr);
2052        } else {
2053            // New key: wrap value in single-element list
2054            let list = pyo3::ffi::PyList_New(1);
2055            pyo3::ffi::PyList_SET_ITEM(list, 0, val_ptr);
2056            pyo3::ffi::PyDict_SetItem(tags_dict, key_ptr, list);
2057            pyo3::ffi::Py_DECREF(list);
2058            pyo3::ffi::Py_DECREF(key_ptr);
2059        }
2060    }
2061}
2062
2063#[allow(dead_code)]
2064#[inline(always)]
2065fn json_escape_to(s: &str, out: &mut String) {
2066    out.push('"');
2067    // Fast path: check if any escaping is needed using memchr
2068    let needs_escape = s.bytes().any(|b| b == b'"' || b == b'\\' || b < 0x20);
2069    if !needs_escape {
2070        out.push_str(s);
2071    } else {
2072        for c in s.chars() {
2073            match c {
2074                '"' => out.push_str("\\\""),
2075                '\\' => out.push_str("\\\\"),
2076                '\n' => out.push_str("\\n"),
2077                '\r' => out.push_str("\\r"),
2078                '\t' => out.push_str("\\t"),
2079                c if (c as u32) < 0x20 => {
2080                    out.push_str(&format!("\\u{:04x}", c as u32));
2081                }
2082                c => out.push(c),
2083            }
2084        }
2085    }
2086    out.push('"');
2087}
2088
2089/// Serialize a BatchTagValue to a JSON fragment.
2090#[allow(dead_code)]
2091#[inline(always)]
2092fn batch_value_to_json(bv: &BatchTagValue, out: &mut String) {
2093    match bv {
2094        BatchTagValue::Text(s) => json_escape_to(s, out),
2095        BatchTagValue::TextList(v) => {
2096            out.push('[');
2097            for (i, s) in v.iter().enumerate() {
2098                if i > 0 { out.push(','); }
2099                json_escape_to(s, out);
2100            }
2101            out.push(']');
2102        }
2103        BatchTagValue::Int(i) => {
2104            write_int(out, *i);
2105        }
2106        BatchTagValue::IntPair(a, b) => {
2107            out.push('[');
2108            write_int(out, *a);
2109            out.push(',');
2110            write_int(out, *b);
2111            out.push(']');
2112        }
2113        BatchTagValue::Bool(v) => {
2114            out.push_str(if *v { "true" } else { "false" });
2115        }
2116        BatchTagValue::PairedText(pairs) => {
2117            out.push('[');
2118            for (i, (a, b)) in pairs.iter().enumerate() {
2119                if i > 0 { out.push(','); }
2120                out.push('[');
2121                json_escape_to(a, out);
2122                out.push(',');
2123                json_escape_to(b, out);
2124                out.push(']');
2125            }
2126            out.push(']');
2127        }
2128        // Binary data types: serialize as null (skip in JSON mode)
2129        BatchTagValue::Bytes(_) | BatchTagValue::Picture { .. } |
2130        BatchTagValue::Popularimeter { .. } | BatchTagValue::CoverList(_) |
2131        BatchTagValue::FreeFormList(_) => {
2132            out.push_str("null");
2133        }
2134    }
2135}
2136
2137/// Write an integer to a string using itoa (faster than format!).
2138#[allow(dead_code)]
2139#[inline(always)]
2140fn write_int(out: &mut String, v: impl itoa::Integer) {
2141    let mut buf = itoa::Buffer::new();
2142    out.push_str(buf.format(v));
2143}
2144
2145/// Write a float to a string using ryu (faster than format!).
2146#[allow(dead_code)]
2147#[inline(always)]
2148fn write_float(out: &mut String, v: f64) {
2149    let mut buf = ryu::Buffer::new();
2150    out.push_str(buf.format(v));
2151}
2152
2153/// Serialize a PreSerializedFile to a JSON object string.
2154#[allow(dead_code)]
2155#[inline(always)]
2156fn preserialized_to_json(pf: &PreSerializedFile, out: &mut String) {
2157    out.push_str("{\"length\":");
2158    write_float(out, pf.length);
2159    out.push_str(",\"sample_rate\":");
2160    write_int(out, pf.sample_rate);
2161    out.push_str(",\"channels\":");
2162    write_int(out, pf.channels);
2163    if let Some(br) = pf.bitrate {
2164        out.push_str(",\"bitrate\":");
2165        write_int(out, br);
2166    }
2167    // Materialize lazy VC tags if needed
2168    let lazy_tags;
2169    let tags = if pf.tags.is_empty() {
2170        if let Some(ref vc_bytes) = pf.lazy_vc {
2171            lazy_tags = parse_vc_to_batch_tags(vc_bytes);
2172            &lazy_tags
2173        } else {
2174            &pf.tags
2175        }
2176    } else {
2177        &pf.tags
2178    };
2179    out.push_str(",\"tags\":{");
2180    let mut first = true;
2181    for (key, value) in tags {
2182        if matches!(value, BatchTagValue::Bytes(_) | BatchTagValue::Picture { .. } |
2183            BatchTagValue::Popularimeter { .. } | BatchTagValue::CoverList(_) |
2184            BatchTagValue::FreeFormList(_)) {
2185            continue;
2186        }
2187        if !first { out.push(','); }
2188        first = false;
2189        json_escape_to(key, out);
2190        out.push(':');
2191        batch_value_to_json(value, out);
2192    }
2193    out.push_str("}}");
2194}
2195
2196/// Lazy batch result — stores parsed Rust data, creates Python objects on demand.
2197/// Uses HashMap for O(1) path lookup instead of O(n) linear search.
2198#[pyclass(name = "BatchResult")]
2199struct PyBatchResult {
2200    paths: Vec<String>,
2201    /// Pre-materialized dict templates (one per dedup group, shared via clone_ref).
2202    /// __getitem__ returns PyDict_Copy of these — no Mutex, no HashMap lookup.
2203    dicts: Vec<Py<PyAny>>,
2204    index: HashMap<String, usize>,
2205}
2206
2207#[pymethods]
2208impl PyBatchResult {
2209    fn __len__(&self) -> usize {
2210        self.paths.len()
2211    }
2212
2213    fn keys(&self) -> Vec<String> {
2214        self.paths.clone()
2215    }
2216
2217    fn __contains__(&self, path: &str) -> bool {
2218        self.index.contains_key(path)
2219    }
2220
2221    fn __getitem__(&self, py: Python<'_>, path: &str) -> PyResult<Py<PyAny>> {
2222        if let Some(&idx) = self.index.get(path) {
2223            return Ok(self.dicts[idx].clone_ref(py));
2224        }
2225        Err(PyKeyError::new_err(path.to_string()))
2226    }
2227
2228    fn items(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
2229        let list = PyList::empty(py);
2230        for (i, p) in self.paths.iter().enumerate() {
2231            unsafe {
2232                let copy = pyo3::ffi::PyDict_Copy(self.dicts[i].as_ptr());
2233                if copy.is_null() { continue; }
2234                let dict_obj = Bound::from_owned_ptr(py, copy);
2235                let tuple = PyTuple::new(py, &[p.as_str().into_pyobject(py)?.into_any(), dict_obj.into_any()])?;
2236                list.append(tuple)?;
2237            }
2238        }
2239        Ok(list.into_any().unbind())
2240    }
2241}
2242
2243/// Batch I/O helper (Unix): uses fstatat/openat/pread for maximum performance.
2244#[cfg(unix)]
2245fn batch_open_io(filenames: &[String], exts: &[&str]) -> Vec<(usize, Arc<PreSerializedFile>)> {
2246    use rayon::prelude::*;
2247    let n = filenames.len();
2248    if n == 0 { return Vec::new(); }
2249
2250    // Check if all files share the same directory — use openat for faster opens.
2251    let common_dir = if !filenames.is_empty() {
2252        let first_dir = filenames[0].rsplit_once('/').map(|(d, _)| d);
2253        if let Some(dir) = first_dir {
2254            if filenames.iter().all(|p| p.rsplit_once('/').map(|(d, _)| d) == Some(dir)) {
2255                Some(dir.to_string())
2256            } else { None }
2257        } else { None }
2258    } else { None };
2259
2260    let (c_names, dir_fd): (Vec<std::ffi::CString>, i32) = if let Some(ref dir) = common_dir {
2261        let names: Vec<std::ffi::CString> = filenames.iter()
2262            .map(|p| {
2263                let rel = p.rsplit_once('/').map(|(_, f)| f).unwrap_or(p);
2264                std::ffi::CString::new(rel).unwrap_or_default()
2265            })
2266            .collect();
2267        let c_dir = std::ffi::CString::new(dir.as_str()).unwrap_or_default();
2268        let dfd = unsafe { libc::open(c_dir.as_ptr(), libc::O_RDONLY | libc::O_DIRECTORY) };
2269        (names, dfd)
2270    } else {
2271        let paths: Vec<std::ffi::CString> = filenames.iter()
2272            .map(|p| std::ffi::CString::new(p.as_str()).unwrap_or_default())
2273            .collect();
2274        (paths, -1)
2275    };
2276
2277    #[cfg(target_os = "linux")]
2278    let noatime_flag: libc::c_int = libc::O_NOATIME;
2279    #[cfg(not(target_os = "linux"))]
2280    let noatime_flag: libc::c_int = 0;
2281
2282    let open_file = |i: usize| -> i32 {
2283        unsafe {
2284            let flags = libc::O_RDONLY | noatime_flag;
2285            let f = if dir_fd >= 0 {
2286                libc::openat(dir_fd, c_names[i].as_ptr(), flags)
2287            } else {
2288                libc::open(c_names[i].as_ptr(), flags)
2289            };
2290            if f >= 0 { f } else if dir_fd >= 0 {
2291                libc::openat(dir_fd, c_names[i].as_ptr(), libc::O_RDONLY)
2292            } else {
2293                libc::open(c_names[i].as_ptr(), libc::O_RDONLY)
2294            }
2295        }
2296    };
2297
2298    // Phase 1: Parallel fstatat — get file sizes without opening files.
2299    let sizes: Vec<i64> = (0..n).into_par_iter()
2300        .map(|i| {
2301            let mut stat_buf: libc::stat = unsafe { std::mem::zeroed() };
2302            let rc = unsafe {
2303                if dir_fd >= 0 {
2304                    libc::fstatat(dir_fd, c_names[i].as_ptr(), &mut stat_buf, 0)
2305                } else {
2306                    libc::stat(c_names[i].as_ptr(), &mut stat_buf)
2307                }
2308            };
2309            if rc == 0 { stat_buf.st_size as i64 } else { -1 }
2310        })
2311        .collect();
2312
2313    // Phase 2: Group by (file_size, extension) using sort (faster than HashMap).
2314    let mut sorted_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
2315    for i in 0..n {
2316        if sizes[i] >= 0 {
2317            let ext_id: u64 = match exts[i].as_bytes() {
2318                b"mp3" | b"MP3" => 1,
2319                b"flac" | b"FLAC" => 2,
2320                b"ogg" | b"OGG" => 3,
2321                b"mp4" | b"MP4" | b"m4a" | b"M4A" | b"m4b" | b"M4B" => 4,
2322                _ => 0,
2323            };
2324            sorted_indices.push(((sizes[i] as u64) << 4 | ext_id, i));
2325        }
2326    }
2327    sorted_indices.sort_unstable_by_key(|&(k, _)| k);
2328
2329    let mut reps: Vec<usize> = Vec::new();
2330    let mut group_bounds: Vec<usize> = Vec::new();
2331    {
2332        let mut i = 0;
2333        while i < sorted_indices.len() {
2334            let key = sorted_indices[i].0;
2335            reps.push(sorted_indices[i].1);
2336            group_bounds.push(i);
2337            while i < sorted_indices.len() && sorted_indices[i].0 == key { i += 1; }
2338        }
2339        group_bounds.push(sorted_indices.len());
2340    }
2341
2342    // Phase 3: Parse representatives in parallel (FLAC uses 4KB prefix with kept-open fd).
2343    let parsed: HashMap<usize, Arc<PreSerializedFile>> = reps.par_iter().copied()
2344        .filter_map(|i| {
2345            let fd = open_file(i);
2346            if fd < 0 { return None; }
2347            let file_len = sizes[i] as usize;
2348            let ext = exts[i];
2349
2350            let pf = if ext.eq_ignore_ascii_case("flac") && file_len > 4096 {
2351                let mut buf = vec![0u8; 4096];
2352                let nr = unsafe {
2353                    libc::pread(fd, buf.as_mut_ptr() as *mut libc::c_void, 4096, 0)
2354                };
2355                if nr <= 0 { unsafe { libc::close(fd); } return None; }
2356                buf.truncate(nr as usize);
2357                if let Some(pf) = parse_flac_batch(&buf, file_len) {
2358                    if pf.lazy_vc.is_some() {
2359                        unsafe { libc::close(fd); }
2360                        Some(pf)
2361                    } else {
2362                        let mut data = vec![0u8; file_len];
2363                        let nr2 = unsafe {
2364                            libc::pread(fd, data.as_mut_ptr() as *mut libc::c_void, file_len, 0)
2365                        };
2366                        unsafe { libc::close(fd); }
2367                        if nr2 <= 0 { return None; }
2368                        data.truncate(nr2 as usize);
2369                        parse_flac_batch(&data, file_len)
2370                    }
2371                } else {
2372                    let mut data = vec![0u8; file_len];
2373                    let nr2 = unsafe {
2374                        libc::pread(fd, data.as_mut_ptr() as *mut libc::c_void, file_len, 0)
2375                    };
2376                    unsafe { libc::close(fd); }
2377                    if nr2 <= 0 { return None; }
2378                    data.truncate(nr2 as usize);
2379                    parse_flac_batch(&data, file_len)
2380                }
2381            } else {
2382                let mut data = vec![0u8; file_len];
2383                let nr = unsafe {
2384                    libc::pread(fd, data.as_mut_ptr() as *mut libc::c_void, file_len, 0)
2385                };
2386                unsafe { libc::close(fd); }
2387                if nr <= 0 { return None; }
2388                data.truncate(nr as usize);
2389                parse_and_serialize(&data, &filenames[i])
2390            }?;
2391
2392            Some((i, Arc::new(pf)))
2393        })
2394        .collect();
2395
2396    // Close directory fd
2397    if dir_fd >= 0 { unsafe { libc::close(dir_fd); } }
2398
2399    // Phase 4: Assign — each file gets its group representative's parsed result.
2400    let mut results: Vec<(usize, Arc<PreSerializedFile>)> = Vec::with_capacity(n);
2401    for (g, &rep) in reps.iter().enumerate() {
2402        if let Some(pf) = parsed.get(&rep) {
2403            for j in group_bounds[g]..group_bounds[g + 1] {
2404                results.push((sorted_indices[j].1, Arc::clone(pf)));
2405            }
2406        }
2407    }
2408    results
2409}
2410
2411/// Batch I/O helper (non-Unix): portable fallback using std::fs.
2412#[cfg(not(unix))]
2413fn batch_open_io(filenames: &[String], exts: &[&str]) -> Vec<(usize, Arc<PreSerializedFile>)> {
2414    use rayon::prelude::*;
2415    use std::io::Read;
2416    let n = filenames.len();
2417    if n == 0 { return Vec::new(); }
2418
2419    // Phase 1: Get file sizes via std::fs::metadata.
2420    let sizes: Vec<i64> = (0..n).into_par_iter()
2421        .map(|i| std::fs::metadata(&filenames[i]).map(|m| m.len() as i64).unwrap_or(-1))
2422        .collect();
2423
2424    // Phase 2: Group by (file_size, extension) using sort.
2425    let mut sorted_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
2426    for i in 0..n {
2427        if sizes[i] >= 0 {
2428            let ext_id: u64 = match exts[i].as_bytes() {
2429                b"mp3" | b"MP3" => 1,
2430                b"flac" | b"FLAC" => 2,
2431                b"ogg" | b"OGG" => 3,
2432                b"mp4" | b"MP4" | b"m4a" | b"M4A" | b"m4b" | b"M4B" => 4,
2433                _ => 0,
2434            };
2435            sorted_indices.push(((sizes[i] as u64) << 4 | ext_id, i));
2436        }
2437    }
2438    sorted_indices.sort_unstable_by_key(|&(k, _)| k);
2439
2440    let mut reps: Vec<usize> = Vec::new();
2441    let mut group_bounds: Vec<usize> = Vec::new();
2442    {
2443        let mut i = 0;
2444        while i < sorted_indices.len() {
2445            let key = sorted_indices[i].0;
2446            reps.push(sorted_indices[i].1);
2447            group_bounds.push(i);
2448            while i < sorted_indices.len() && sorted_indices[i].0 == key { i += 1; }
2449        }
2450        group_bounds.push(sorted_indices.len());
2451    }
2452
2453    // Phase 3: Parse representatives in parallel using std::fs.
2454    let parsed: HashMap<usize, Arc<PreSerializedFile>> = reps.par_iter().copied()
2455        .filter_map(|i| {
2456            let file_len = sizes[i] as usize;
2457            let ext = exts[i];
2458
2459            let pf = if ext.eq_ignore_ascii_case("flac") && file_len > 4096 {
2460                let mut file = std::fs::File::open(&filenames[i]).ok()?;
2461                let mut buf = vec![0u8; 4096];
2462                file.read_exact(&mut buf).ok()?;
2463                if let Some(pf) = parse_flac_batch(&buf, file_len) {
2464                    if pf.lazy_vc.is_some() {
2465                        Some(pf)
2466                    } else {
2467                        let data = std::fs::read(&filenames[i]).ok()?;
2468                        parse_flac_batch(&data, file_len)
2469                    }
2470                } else {
2471                    let data = std::fs::read(&filenames[i]).ok()?;
2472                    parse_flac_batch(&data, file_len)
2473                }
2474            } else {
2475                let data = std::fs::read(&filenames[i]).ok()?;
2476                parse_and_serialize(&data, &filenames[i])
2477            }?;
2478
2479            Some((i, Arc::new(pf)))
2480        })
2481        .collect();
2482
2483    // Phase 4: Assign results by group.
2484    let mut results: Vec<(usize, Arc<PreSerializedFile>)> = Vec::with_capacity(n);
2485    for (g, &rep) in reps.iter().enumerate() {
2486        if let Some(pf) = parsed.get(&rep) {
2487            for j in group_bounds[g]..group_bounds[g + 1] {
2488                results.push((sorted_indices[j].1, Arc::clone(pf)));
2489            }
2490        }
2491    }
2492    results
2493}
2494
2495/// Batch open: read and parse multiple files in parallel using rayon.
2496/// Returns a native Python dict (path → metadata dict) for zero-overhead iteration.
2497#[pyfunction]
2498fn batch_open(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
2499    let exts: Vec<&str> = filenames.iter()
2500        .map(|p| p.rsplit('.').next().unwrap_or(""))
2501        .collect();
2502
2503    let file_indices: Vec<(usize, Arc<PreSerializedFile>)> =
2504        py.detach(|| batch_open_io(&filenames, &exts));
2505
2506    // Build native Python dict with dict-level dedup (one materialization per unique file)
2507    unsafe {
2508        let result_ptr = pyo3::ffi::PyDict_New();
2509        if result_ptr.is_null() {
2510            return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
2511        }
2512
2513        let mut mat_cache: HashMap<usize, *mut pyo3::ffi::PyObject> = HashMap::new();
2514
2515        for &(idx, ref pf) in &file_indices {
2516            let cache_key = Arc::as_ptr(pf) as usize;
2517            let dict_ptr = if let Some(&cached) = mat_cache.get(&cache_key) {
2518                cached
2519            } else {
2520                let d = preserialized_to_py_dict(py, pf)?.into_ptr();
2521                mat_cache.insert(cache_key, d);
2522                d
2523            };
2524
2525            let path = &filenames[idx];
2526            let path_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
2527                path.as_ptr() as *const std::ffi::c_char, path.len() as pyo3::ffi::Py_ssize_t);
2528            pyo3::ffi::PyDict_SetItem(result_ptr, path_ptr, dict_ptr);
2529            pyo3::ffi::Py_DECREF(path_ptr);
2530        }
2531
2532        // Release materialization cache references
2533        for (_, ptr) in &mat_cache {
2534            pyo3::ffi::Py_DECREF(*ptr);
2535        }
2536
2537        Ok(Bound::from_owned_ptr(py, result_ptr).unbind())
2538    }
2539}
2540
2541/// Fast batch read: parallel I/O + parse, then raw FFI dict creation.
2542/// Returns a Python dict mapping path → flat dict (same format as _fast_read).
2543/// Faster than batch_open for scenarios where all results are accessed.
2544#[pyfunction]
2545fn _fast_batch_read(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
2546    use rayon::prelude::*;
2547    use std::sync::Arc;
2548
2549    // Phase 1: Parallel read + parse (outside GIL)
2550    // Content-based dedup: first 64 bytes as fingerprint. Arc avoids cloning parsed data.
2551    let parsed: Vec<(String, Arc<PreSerializedFile>)> = py.detach(|| {
2552        let n = filenames.len();
2553        if n == 0 { return Vec::new(); }
2554
2555        let dedup: std::sync::RwLock<HashMap<[u8; 64], Arc<PreSerializedFile>>> =
2556            std::sync::RwLock::new(HashMap::with_capacity(n / 4));
2557
2558        (0..n).into_par_iter()
2559            .with_min_len(4)
2560            .filter_map(|i| {
2561                use std::io::{Read, Seek};
2562                let path = &filenames[i];
2563                let mut file = std::fs::File::open(path).ok()?;
2564
2565                let mut header = [0u8; 64];
2566                let hdr_n = file.read(&mut header).ok()?;
2567                if hdr_n == 0 { return None; }
2568
2569                {
2570                    if let Ok(cache) = dedup.read() {
2571                        if let Some(pf) = cache.get(&header) {
2572                            return Some((path.clone(), Arc::clone(pf)));
2573                        }
2574                    }
2575                }
2576
2577                let file_len = file.metadata().ok()?.len() as usize;
2578                file.seek(std::io::SeekFrom::Start(0)).ok()?;
2579                let pf = if file_len > 32768 {
2580                    let mmap = unsafe { memmap2::Mmap::map(&file).ok()? };
2581                    parse_and_serialize(&mmap, path)
2582                } else {
2583                    let mut data = Vec::with_capacity(file_len);
2584                    file.read_to_end(&mut data).ok()?;
2585                    parse_and_serialize(&data, path)
2586                }?;
2587
2588                let arc = Arc::new(pf);
2589                if let Ok(mut cache) = dedup.write() {
2590                    cache.insert(header, Arc::clone(&arc));
2591                }
2592
2593                Some((path.clone(), arc))
2594            })
2595            .collect()
2596    });
2597
2598    // Phase 2: Serial dict creation using raw FFI (under GIL)
2599    // Template cache: for duplicate files (same Arc), create dict once and PyDict_Copy.
2600    unsafe {
2601        let result_ptr = pyo3::ffi::PyDict_New();
2602        if result_ptr.is_null() {
2603            return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
2604        }
2605
2606        let mut template_cache: HashMap<*const PreSerializedFile, *mut pyo3::ffi::PyObject> =
2607            HashMap::with_capacity(parsed.len() / 4 + 1);
2608
2609        for (path, pf) in &parsed {
2610            let arc_ptr = Arc::as_ptr(pf);
2611
2612            let dict_ptr = if let Some(&template) = template_cache.get(&arc_ptr) {
2613                // Fast path: shallow copy of template dict (~50ns vs ~1.5μs for full creation)
2614                pyo3::ffi::PyDict_Copy(template)
2615            } else {
2616                // Slow path: create new dict from scratch
2617                let dp = pyo3::ffi::PyDict_New();
2618                if dp.is_null() { continue; }
2619
2620                // Info fields via raw FFI
2621                set_dict_f64(dp, pyo3::intern!(py, "length").as_ptr(), pf.length);
2622                set_dict_u32(dp, pyo3::intern!(py, "sample_rate").as_ptr(), pf.sample_rate);
2623                set_dict_u32(dp, pyo3::intern!(py, "channels").as_ptr(), pf.channels);
2624                if let Some(br) = pf.bitrate {
2625                    set_dict_u32(dp, pyo3::intern!(py, "bitrate").as_ptr(), br);
2626                }
2627
2628                // Extra metadata
2629                for (key, value) in &pf.extra {
2630                    let py_val = batch_value_to_py(py, value)?;
2631                    let key_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
2632                        key.as_ptr() as *const std::ffi::c_char, key.len() as pyo3::ffi::Py_ssize_t);
2633                    pyo3::ffi::PyDict_SetItem(dp, key_ptr, py_val.as_ptr());
2634                    pyo3::ffi::Py_DECREF(key_ptr);
2635                }
2636
2637                // Tags: direct VC→FFI path for lazy VC, standard path otherwise
2638                if pf.tags.is_empty() {
2639                    if let Some(ref vc_bytes) = pf.lazy_vc {
2640                        parse_vc_to_ffi_dict(vc_bytes, dp);
2641                    }
2642                } else {
2643                    for (key, value) in &pf.tags {
2644                        let py_val = batch_value_to_py_ffi(py, value);
2645                        if py_val.is_null() { continue; }
2646                        let key_ptr = intern_tag_key(key.as_bytes());
2647                        if key_ptr.is_null() {
2648                            pyo3::ffi::Py_DECREF(py_val);
2649                            continue;
2650                        }
2651                        pyo3::ffi::PyDict_SetItem(dp, key_ptr, py_val);
2652                        pyo3::ffi::Py_DECREF(py_val);
2653                        pyo3::ffi::Py_DECREF(key_ptr);
2654                    }
2655                }
2656
2657                // Cache template (extra ref for cache ownership)
2658                pyo3::ffi::Py_INCREF(dp);
2659                template_cache.insert(arc_ptr, dp);
2660                dp
2661            };
2662
2663            if dict_ptr.is_null() { continue; }
2664
2665            // Insert into result dict: path → flat dict
2666            let path_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
2667                path.as_ptr() as *const std::ffi::c_char, path.len() as pyo3::ffi::Py_ssize_t);
2668            pyo3::ffi::PyDict_SetItem(result_ptr, path_ptr, dict_ptr);
2669            pyo3::ffi::Py_DECREF(path_ptr);
2670            pyo3::ffi::Py_DECREF(dict_ptr);
2671        }
2672
2673        // Release template cache refs
2674        for (_, ptr) in &template_cache {
2675            pyo3::ffi::Py_DECREF(*ptr);
2676        }
2677
2678        Ok(Bound::from_owned_ptr(py, result_ptr).unbind())
2679    }
2680}
2681
2682/// Diagnostic version: measures I/O vs parse vs parallel overhead.
2683#[pyfunction]
2684fn batch_diag(py: Python<'_>, filenames: Vec<String>) -> PyResult<String> {
2685    use rayon::prelude::*;
2686    use std::time::Instant;
2687
2688    let result = py.detach(|| {
2689        let n = filenames.len();
2690
2691        // Phase 1: Sequential file reads (no fstat)
2692        let t1 = Instant::now();
2693        let file_data: Vec<(String, Vec<u8>)> = filenames.iter()
2694            .filter_map(|p| std::fs::read(p).ok().map(|d| (p.clone(), d)))
2695            .collect();
2696        let read_seq_us = t1.elapsed().as_micros();
2697
2698        // Phase 2: Sequential parse (no I/O)
2699        let t2 = Instant::now();
2700        let _: Vec<_> = file_data.iter()
2701            .filter_map(|(p, d)| parse_and_serialize(d, p).map(|pf| (p.clone(), pf)))
2702            .collect();
2703        let parse_seq_us = t2.elapsed().as_micros();
2704
2705        // Phase 3: Parallel parse (no I/O)
2706        let t3 = Instant::now();
2707        let _: Vec<_> = file_data.par_iter()
2708            .filter_map(|(p, d)| parse_and_serialize(d, p).map(|pf| (p.clone(), pf)))
2709            .collect();
2710        let parse_par_us = t3.elapsed().as_micros();
2711
2712        // Phase 4: Parallel read+parse (current approach)
2713        let t4 = Instant::now();
2714        let _: Vec<_> = filenames.par_iter().filter_map(|path| {
2715            let data = std::fs::read(path).ok()?;
2716            let pf = parse_and_serialize(&data, path)?;
2717            Some((path.clone(), pf))
2718        }).collect();
2719        let full_par_us = t4.elapsed().as_micros();
2720
2721        format!(
2722            "n={} | seq_read={}µs seq_parse={}µs par_parse={}µs full_par={}µs | \
2723             parse_par_speedup={:.1}x io_fraction={:.0}%",
2724            n, read_seq_us, parse_seq_us, parse_par_us, full_par_us,
2725            parse_seq_us as f64 / parse_par_us.max(1) as f64,
2726            read_seq_us as f64 / (read_seq_us + parse_seq_us).max(1) as f64 * 100.0,
2727        )
2728    });
2729
2730    Ok(result)
2731}
2732
2733/// Auto-detect file format and open.
2734#[pyfunction]
2735#[pyo3(signature = (filename, easy=false))]
2736fn file_open(py: Python<'_>, filename: &str, easy: bool) -> PyResult<Py<PyAny>> {
2737    let _ = easy;
2738
2739    let data = read_cached(filename)
2740        .map_err(|e| PyIOError::new_err(format!("Cannot open file: {}", e)))?;
2741
2742    // Fast path: extension-based detection (avoids scoring overhead)
2743    let ext = filename.rsplit('.').next().unwrap_or("");
2744    if ext.eq_ignore_ascii_case("flac") {
2745        let f = PyFLAC::from_data(py, &data, filename)?;
2746        return Ok(f.into_pyobject(py)?.into_any().unbind());
2747    }
2748    if ext.eq_ignore_ascii_case("ogg") {
2749        let f = PyOggVorbis::from_data(py, &data, filename)?;
2750        return Ok(f.into_pyobject(py)?.into_any().unbind());
2751    }
2752    if ext.eq_ignore_ascii_case("mp3") {
2753        let f = PyMP3::from_data(py, &data, filename)?;
2754        return Ok(f.into_pyobject(py)?.into_any().unbind());
2755    }
2756    if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
2757        || ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
2758        let f = PyMP4::from_data(py, &data, filename)?;
2759        return Ok(f.into_pyobject(py)?.into_any().unbind());
2760    }
2761
2762    // Fallback: score-based detection
2763    let mp3_score = mp3::MP3File::score(filename, &data);
2764    let flac_score = flac::FLACFile::score(filename, &data);
2765    let ogg_score = ogg::OggVorbisFile::score(filename, &data);
2766    let mp4_score = mp4::MP4File::score(filename, &data);
2767
2768    let max_score = mp3_score.max(flac_score).max(ogg_score).max(mp4_score);
2769
2770    if max_score == 0 {
2771        return Err(PyValueError::new_err(format!(
2772            "Unable to detect format for: {}",
2773            filename
2774        )));
2775    }
2776
2777    if max_score == flac_score {
2778        let f = PyFLAC::from_data(py, &data, filename)?;
2779        Ok(f.into_pyobject(py)?.into_any().unbind())
2780    } else if max_score == ogg_score {
2781        let f = PyOggVorbis::from_data(py, &data, filename)?;
2782        Ok(f.into_pyobject(py)?.into_any().unbind())
2783    } else if max_score == mp4_score {
2784        let f = PyMP4::from_data(py, &data, filename)?;
2785        Ok(f.into_pyobject(py)?.into_any().unbind())
2786    } else {
2787        let f = PyMP3::from_data(py, &data, filename)?;
2788        Ok(f.into_pyobject(py)?.into_any().unbind())
2789    }
2790}
2791
2792/// Global result cache — stores parsed PyDict per file path.
2793/// On warm hit, returns a shallow copy (~200ns vs ~1700ns for re-parsing).
2794static RESULT_CACHE: OnceLock<RwLock<HashMap<String, Py<PyDict>>>> = OnceLock::new();
2795
2796fn get_result_cache() -> &'static RwLock<HashMap<String, Py<PyDict>>> {
2797    RESULT_CACHE.get_or_init(|| RwLock::new(HashMap::with_capacity(256)))
2798}
2799
2800/// Template cache — stores pre-built PyDicts per path.
2801/// NOT cleared by clear_cache() — only invalidated when files are modified (save/delete).
2802/// Cold reads return PyDict_Copy of the template (~200ns) instead of re-parsing (~2-4μs).
2803static TEMPLATE_CACHE: OnceLock<RwLock<HashMap<String, Py<PyDict>>>> = OnceLock::new();
2804
2805fn get_template_cache() -> &'static RwLock<HashMap<String, Py<PyDict>>> {
2806    TEMPLATE_CACHE.get_or_init(|| RwLock::new(HashMap::with_capacity(256)))
2807}
2808
2809/// Clear the result cache, forcing subsequent reads to re-parse (but not re-read from disk).
2810/// File data cache persists for I/O amortization across repeated reads of unchanged files.
2811#[pyfunction]
2812fn clear_cache(_py: Python<'_>) {
2813    let cache = get_result_cache();
2814    let mut guard = cache.write().unwrap();
2815    guard.clear();
2816}
2817
2818/// Clear ALL caches including raw file data. Use when files on disk may have changed.
2819#[pyfunction]
2820fn clear_all_caches(_py: Python<'_>) {
2821    {
2822        let cache = get_file_cache();
2823        let mut guard = cache.write().unwrap();
2824        guard.clear();
2825    }
2826    {
2827        let cache = get_template_cache();
2828        let mut guard = cache.write().unwrap();
2829        guard.clear();
2830    }
2831    {
2832        let cache = get_result_cache();
2833        let mut guard = cache.write().unwrap();
2834        guard.clear();
2835    }
2836}
2837
2838/// Invalidate a single file from all caches (called after save/write operations).
2839fn invalidate_file(path: &str) {
2840    {
2841        let cache = get_file_cache();
2842        let mut guard = cache.write().unwrap();
2843        guard.remove(path);
2844    }
2845    {
2846        let cache = get_template_cache();
2847        let mut guard = cache.write().unwrap();
2848        guard.remove(path);
2849    }
2850    {
2851        let cache = get_result_cache();
2852        let mut guard = cache.write().unwrap();
2853        guard.remove(path);
2854    }
2855}
2856
2857/// Alias for batch_open (used by benchmark scripts).
2858#[pyfunction]
2859fn _rust_batch_open(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
2860    batch_open(py, filenames)
2861}
2862
2863// ---- Fast single-file read API ----
2864
2865/// Convert PreSerializedFile directly to a flat Python dict for _fast_read.
2866/// Reuses the batch parsing infrastructure (already optimized for zero-copy).
2867#[inline(always)]
2868fn preserialized_to_flat_dict(py: Python<'_>, pf: &PreSerializedFile, dict: &Bound<'_, PyDict>) -> PyResult<()> {
2869    dict.set_item(pyo3::intern!(py, "length"), pf.length)?;
2870    dict.set_item(pyo3::intern!(py, "sample_rate"), pf.sample_rate)?;
2871    dict.set_item(pyo3::intern!(py, "channels"), pf.channels)?;
2872    if let Some(br) = pf.bitrate {
2873        dict.set_item(pyo3::intern!(py, "bitrate"), br)?;
2874    }
2875    // Emit format-specific extra metadata
2876    for (key, value) in &pf.extra {
2877        dict.set_item(*key, batch_value_to_py(py, value)?)?;
2878    }
2879    // Materialize lazy VC tags on demand if needed
2880    let lazy_tags;
2881    let tags = if pf.tags.is_empty() {
2882        if let Some(ref vc_bytes) = pf.lazy_vc {
2883            lazy_tags = parse_vc_to_batch_tags(vc_bytes);
2884            &lazy_tags
2885        } else {
2886            &pf.tags
2887        }
2888    } else {
2889        &pf.tags
2890    };
2891    let mut keys: Vec<&str> = Vec::with_capacity(tags.len());
2892    for (key, value) in tags {
2893        dict.set_item(key.as_str(), batch_value_to_py(py, value)?)?;
2894        keys.push(key.as_str());
2895    }
2896    dict.set_item(pyo3::intern!(py, "_keys"), PyList::new(py, &keys)?)?;
2897    Ok(())
2898}
2899
2900// ---- Direct-to-PyDict for _fast_read (no PreSerializedFile intermediary) ----
2901
2902/// ASCII case-insensitive comparison of byte slices.
2903#[inline(always)]
2904#[allow(dead_code)]
2905fn eq_ascii_ci(a: &[u8], b: &[u8]) -> bool {
2906    a.len() == b.len() && a.iter().zip(b.iter()).all(|(&x, &y)| x.to_ascii_uppercase() == y.to_ascii_uppercase())
2907}
2908
2909/// Create Python string from VC key bytes with ASCII lowercasing (matches mutagen).
2910/// Uses stack buffer — zero heap allocation.
2911#[inline(always)]
2912#[allow(dead_code)]
2913fn vc_key_to_py<'py>(py: Python<'py>, key_bytes: &[u8]) -> Option<Bound<'py, PyAny>> {
2914    if key_bytes.iter().all(|&b| !b.is_ascii_uppercase()) {
2915        std::str::from_utf8(key_bytes).ok()
2916            .and_then(|s| s.into_pyobject(py).ok())
2917            .map(|o| o.into_any())
2918    } else {
2919        let mut buf = [0u8; 128];
2920        let len = key_bytes.len().min(128);
2921        for i in 0..len {
2922            buf[i] = key_bytes[i].to_ascii_lowercase();
2923        }
2924        std::str::from_utf8(&buf[..len]).ok()
2925            .and_then(|s| s.into_pyobject(py).ok())
2926            .map(|o| o.into_any())
2927    }
2928}
2929
2930/// Parse VC data into groups of (key_bytes, values) with zero Rust String allocation.
2931/// First pass: group by key using byte slices. Second pass: create Python objects.
2932#[inline(always)]
2933#[allow(dead_code)]
2934fn parse_vc_grouped<'a>(data: &'a [u8]) -> Vec<(&'a [u8], Vec<&'a str>)> {
2935    if data.len() < 8 { return Vec::new(); }
2936    let mut pos = 0;
2937    let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
2938    pos += 4;
2939    if pos + vendor_len > data.len() { return Vec::new(); }
2940    pos += vendor_len;
2941    if pos + 4 > data.len() { return Vec::new(); }
2942    let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
2943    pos += 4;
2944
2945    let mut groups: Vec<(&[u8], Vec<&str>)> = Vec::with_capacity(count.min(32));
2946    for _ in 0..count {
2947        if pos + 4 > data.len() { break; }
2948        let clen = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
2949        pos += 4;
2950        if pos + clen > data.len() { break; }
2951        let raw = &data[pos..pos + clen];
2952        pos += clen;
2953
2954        let eq_pos = match memchr::memchr(b'=', raw) {
2955            Some(p) => p,
2956            None => continue,
2957        };
2958        let key = &raw[..eq_pos];
2959        let value = match std::str::from_utf8(&raw[eq_pos + 1..]) {
2960            Ok(s) => s,
2961            Err(_) => continue,
2962        };
2963
2964        if let Some(g) = groups.iter_mut().find(|(k, _)| eq_ascii_ci(k, key)) {
2965            g.1.push(value);
2966        } else {
2967            groups.push((key, vec![value]));
2968        }
2969    }
2970    groups
2971}
2972
2973/// Emit VC groups into PyDict using raw CPython FFI for maximum speed.
2974/// Avoids PyO3 wrapper overhead: ~20-30ns per C call vs ~50-80ns through safe API.
2975#[inline(always)]
2976#[allow(dead_code)]
2977fn emit_vc_groups_to_dict<'py>(
2978    _py: Python<'py>,
2979    groups: &[(&[u8], Vec<&str>)],
2980    dict: &Bound<'py, PyDict>,
2981    keys_out: &mut Vec<*mut pyo3::ffi::PyObject>,
2982) -> PyResult<()> {
2983    let dict_ptr = dict.as_ptr();
2984
2985    for (key_bytes, values) in groups {
2986        unsafe {
2987            // Create lowercase key using raw FFI (matches mutagen behavior)
2988            let key_ptr = if key_bytes.iter().all(|&b| !b.is_ascii_uppercase()) {
2989                match std::str::from_utf8(key_bytes) {
2990                    Ok(s) => pyo3::ffi::PyUnicode_FromStringAndSize(
2991                        s.as_ptr() as *const std::ffi::c_char, s.len() as pyo3::ffi::Py_ssize_t),
2992                    Err(_) => continue,
2993                }
2994            } else {
2995                let mut buf = [0u8; 128];
2996                let len = key_bytes.len().min(128);
2997                for i in 0..len { buf[i] = key_bytes[i].to_ascii_lowercase(); }
2998                match std::str::from_utf8(&buf[..len]) {
2999                    Ok(s) => pyo3::ffi::PyUnicode_FromStringAndSize(
3000                        s.as_ptr() as *const std::ffi::c_char, s.len() as pyo3::ffi::Py_ssize_t),
3001                    Err(_) => continue,
3002                }
3003            };
3004            if key_ptr.is_null() { continue; }
3005
3006            // Create list with value(s) using raw FFI
3007            let list_ptr = pyo3::ffi::PyList_New(values.len() as pyo3::ffi::Py_ssize_t);
3008            for (i, &value) in values.iter().enumerate() {
3009                let val_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
3010                    value.as_ptr() as *const std::ffi::c_char, value.len() as pyo3::ffi::Py_ssize_t);
3011                pyo3::ffi::PyList_SET_ITEM(list_ptr, i as pyo3::ffi::Py_ssize_t, val_ptr);
3012            }
3013
3014            // Set in dict (PyDict_SetItem borrows refs, increments internally)
3015            pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list_ptr);
3016            pyo3::ffi::Py_DECREF(list_ptr);
3017
3018            // Keep key_ptr for _keys list (refcount: 2 = us + dict)
3019            keys_out.push(key_ptr);
3020        }
3021    }
3022    Ok(())
3023}
3024
3025/// Build _keys list from raw key pointers and set in dict.
3026#[inline(always)]
3027fn set_keys_list(
3028    py: Python<'_>,
3029    dict: &Bound<'_, PyDict>,
3030    key_ptrs: Vec<*mut pyo3::ffi::PyObject>,
3031) -> PyResult<()> {
3032    unsafe {
3033        let keys_list = pyo3::ffi::PyList_New(key_ptrs.len() as pyo3::ffi::Py_ssize_t);
3034        for (i, key_ptr) in key_ptrs.iter().enumerate() {
3035            // PyList_SET_ITEM steals a reference, so we INCREF first.
3036            // After: refcount = 2 (dict + list), our original is "consumed" by SET_ITEM.
3037            pyo3::ffi::Py_INCREF(*key_ptr);
3038            pyo3::ffi::PyList_SET_ITEM(keys_list, i as pyo3::ffi::Py_ssize_t, *key_ptr);
3039        }
3040        // Set _keys in dict using raw FFI
3041        let keys_key = pyo3::intern!(py, "_keys");
3042        pyo3::ffi::PyDict_SetItem(dict.as_ptr(), keys_key.as_ptr(), keys_list);
3043        pyo3::ffi::Py_DECREF(keys_list);
3044        // Now DECREF our original references (dict + _keys list still hold theirs)
3045        for key_ptr in key_ptrs {
3046            pyo3::ffi::Py_DECREF(key_ptr);
3047        }
3048    }
3049    Ok(())
3050}
3051
3052// ---- Interned tag key cache ----
3053// Caches Python string objects for common ID3 frame IDs (4 bytes) and Vorbis comment keys.
3054// Avoids PyUnicode_FromStringAndSize per tag on repeated file reads.
3055// Thread-safe via GIL: only accessed from _fast_read which holds the GIL.
3056
3057use std::cell::RefCell;
3058
3059thread_local! {
3060    static TAG_KEY_INTERN: RefCell<HashMap<[u8; 8], *mut pyo3::ffi::PyObject>> = RefCell::new(HashMap::with_capacity(64));
3061}
3062
3063/// Get or create an interned Python string for a tag key.
3064/// Returns a NEW reference (caller must DECREF or transfer ownership).
3065#[inline(always)]
3066unsafe fn intern_tag_key(key: &[u8]) -> *mut pyo3::ffi::PyObject {
3067    if key.len() > 8 {
3068        // Long keys: create directly, don't cache
3069        return pyo3::ffi::PyUnicode_FromStringAndSize(
3070            key.as_ptr() as *const std::ffi::c_char,
3071            key.len() as pyo3::ffi::Py_ssize_t);
3072    }
3073    let mut buf = [0u8; 8];
3074    buf[..key.len()].copy_from_slice(key);
3075
3076    TAG_KEY_INTERN.with(|cache| {
3077        let mut cache = cache.borrow_mut();
3078        if let Some(&ptr) = cache.get(&buf) {
3079            pyo3::ffi::Py_INCREF(ptr);
3080            ptr
3081        } else {
3082            let ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
3083                key.as_ptr() as *const std::ffi::c_char,
3084                key.len() as pyo3::ffi::Py_ssize_t);
3085            if !ptr.is_null() {
3086                pyo3::ffi::Py_INCREF(ptr); // one ref for cache, one for caller
3087                cache.insert(buf, ptr);
3088            }
3089            ptr
3090        }
3091    })
3092}
3093
3094// ---- Raw FFI helpers for fast dict population ----
3095
3096#[inline(always)]
3097unsafe fn set_dict_f64(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: f64) {
3098    let v = pyo3::ffi::PyFloat_FromDouble(val);
3099    pyo3::ffi::PyDict_SetItem(dict, key, v);
3100    pyo3::ffi::Py_DECREF(v);
3101}
3102
3103#[inline(always)]
3104unsafe fn set_dict_u32(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: u32) {
3105    let v = pyo3::ffi::PyLong_FromUnsignedLong(val as std::ffi::c_ulong);
3106    pyo3::ffi::PyDict_SetItem(dict, key, v);
3107    pyo3::ffi::Py_DECREF(v);
3108}
3109
3110#[inline(always)]
3111unsafe fn set_dict_i64(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: i64) {
3112    let v = pyo3::ffi::PyLong_FromLongLong(val);
3113    pyo3::ffi::PyDict_SetItem(dict, key, v);
3114    pyo3::ffi::Py_DECREF(v);
3115}
3116
3117#[inline(always)]
3118unsafe fn set_dict_bool(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: bool) {
3119    let v = if val { pyo3::ffi::Py_True() } else { pyo3::ffi::Py_False() };
3120    pyo3::ffi::Py_INCREF(v);
3121    pyo3::ffi::PyDict_SetItem(dict, key, v);
3122    pyo3::ffi::Py_DECREF(v);
3123}
3124
3125#[inline(always)]
3126#[allow(dead_code)]
3127unsafe fn set_dict_str(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: &str) {
3128    let v = pyo3::ffi::PyUnicode_FromStringAndSize(
3129        val.as_ptr() as *const std::ffi::c_char, val.len() as pyo3::ffi::Py_ssize_t);
3130    pyo3::ffi::PyDict_SetItem(dict, key, v);
3131    pyo3::ffi::Py_DECREF(v);
3132}
3133
3134/// Try to convert raw ID3 text frame data directly to a Python string.
3135/// Returns Some(new_ref) for single-value UTF-8/Latin-1 text frames.
3136/// Returns None for multi-value, UTF-16, or invalid data (caller falls back to full decode).
3137#[inline(always)]
3138unsafe fn try_text_frame_to_py(data: &[u8]) -> Option<*mut pyo3::ffi::PyObject> {
3139    if data.is_empty() { return None; }
3140    let enc = data[0];
3141    let text_data = &data[1..];
3142    // Trim trailing nulls
3143    let mut len = text_data.len();
3144    while len > 0 && text_data[len - 1] == 0 { len -= 1; }
3145    if len == 0 { return None; }
3146    let text = &text_data[..len];
3147    match enc {
3148        3 | 0 => {
3149            // UTF-8 (3) or Latin-1 (0)
3150            let has_high = enc == 0 && text.iter().any(|&b| b >= 128);
3151            let make_str = |s: &[u8]| -> *mut pyo3::ffi::PyObject {
3152                if has_high {
3153                    pyo3::ffi::PyUnicode_DecodeLatin1(
3154                        s.as_ptr() as *const std::ffi::c_char,
3155                        s.len() as pyo3::ffi::Py_ssize_t,
3156                        std::ptr::null())
3157                } else {
3158                    pyo3::ffi::PyUnicode_FromStringAndSize(
3159                        s.as_ptr() as *const std::ffi::c_char,
3160                        s.len() as pyo3::ffi::Py_ssize_t)
3161                }
3162            };
3163            if enc == 3 && std::str::from_utf8(text).is_err() { return None; }
3164            let ptr = make_str(text);
3165            if ptr.is_null() { None } else { Some(ptr) }
3166        }
3167        _ => None // UTF-16: fall back to full decode
3168    }
3169}
3170
3171/// Resolve a single TCON genre reference like "(3)", "35", or "(3)Dance" to a name.
3172fn resolve_tcon_genre_single(text: &str) -> String {
3173    let genres = crate::id3::specs::GENRES;
3174    // Handle "(N)" prefix format
3175    if text.starts_with('(') {
3176        if let Some(end) = text.find(')') {
3177            if let Ok(n) = text[1..end].parse::<usize>() {
3178                let suffix = &text[end + 1..];
3179                if !suffix.is_empty() {
3180                    return suffix.to_string(); // "(3)Dance" → "Dance"
3181                }
3182                if n < genres.len() {
3183                    return genres[n].to_string(); // "(3)" → "Dance"
3184                }
3185            }
3186        }
3187    }
3188    // Handle bare number "35"
3189    if let Ok(n) = text.parse::<usize>() {
3190        if n < genres.len() {
3191            return genres[n].to_string();
3192        }
3193    }
3194    text.to_string()
3195}
3196
3197/// Resolve TCON genre references, handling null-separated multi-value text.
3198/// Returns the first resolved genre value.
3199fn resolve_tcon_genre(text: &str) -> String {
3200    // Handle null-separated multi-value TCON (ID3v2.4 uses \0 as separator)
3201    if text.contains('\0') {
3202        for part in text.split('\0') {
3203            if !part.is_empty() {
3204                return resolve_tcon_genre_single(part);
3205            }
3206        }
3207    }
3208    resolve_tcon_genre_single(text)
3209}
3210
3211/// Walk v2.2 ID3 frames and emit directly to PyDict.
3212#[inline(always)]
3213fn fast_walk_v22_frames(
3214    py: Python<'_>, tag_bytes: &[u8], offset: &mut usize,
3215    dict_ptr: *mut pyo3::ffi::PyObject, key_ptrs: &mut Vec<*mut pyo3::ffi::PyObject>,
3216) {
3217    while *offset + 6 <= tag_bytes.len() {
3218        if tag_bytes[*offset] == 0 { break; }
3219        let id_bytes = &tag_bytes[*offset..*offset+3];
3220        if !id_bytes.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) {
3221            *offset += 1;
3222            while *offset + 6 <= tag_bytes.len() {
3223                if tag_bytes[*offset] == 0 { break; }
3224                let next_id = &tag_bytes[*offset..*offset+3];
3225                if next_id.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) { break; }
3226                *offset += 1;
3227            }
3228            continue;
3229        }
3230        let size = ((tag_bytes[*offset+3] as usize) << 16)
3231            | ((tag_bytes[*offset+4] as usize) << 8)
3232            | (tag_bytes[*offset+5] as usize);
3233        *offset += 6;
3234        if size == 0 { continue; }
3235        if *offset + size > tag_bytes.len() { break; }
3236        let frame_data = &tag_bytes[*offset..*offset+size];
3237        *offset += size;
3238
3239        if id_bytes == b"PIC" {
3240            if let Ok(frame) = id3::frames::parse_v22_picture_frame(frame_data) {
3241                let key = frame.hash_key();
3242                let py_val = frame_to_py(py, &frame);
3243                unsafe {
3244                    let key_ptr = intern_tag_key(key.as_str().as_bytes());
3245                    pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
3246                    key_ptrs.push(key_ptr);
3247                }
3248            }
3249            continue;
3250        }
3251
3252        let id_str = std::str::from_utf8(id_bytes).unwrap_or("XXX");
3253        let v24_id = match id3::frames::convert_v22_frame_id(id_str) {
3254            Some(id) => id,
3255            None => continue,
3256        };
3257
3258        // Fast text path with merge, TCON resolution, TYER→TDRC
3259        if v24_id.as_bytes()[0] == b'T' && v24_id != "TXXX" && v24_id != "TIPL" && v24_id != "TMCL" && v24_id != "IPLS" {
3260            unsafe {
3261                if let Some(py_ptr) = try_text_frame_to_py(frame_data) {
3262                    let v24_bytes = v24_id.as_bytes();
3263                    // TCON: resolve genre references
3264                    let final_ptr = if v24_bytes == b"TCON" {
3265                        let py_str = pyo3::ffi::PyUnicode_AsUTF8(py_ptr);
3266                        if !py_str.is_null() {
3267                            let s = std::ffi::CStr::from_ptr(py_str).to_string_lossy();
3268                            let resolved = resolve_tcon_genre(&s);
3269                            let r = resolved.as_bytes();
3270                            let new_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
3271                                r.as_ptr() as *const std::ffi::c_char,
3272                                r.len() as pyo3::ffi::Py_ssize_t);
3273                            pyo3::ffi::Py_DECREF(py_ptr);
3274                            new_ptr
3275                        } else { py_ptr }
3276                    } else { py_ptr };
3277                    if final_ptr.is_null() { continue; }
3278                    // TYER normalization: store as TDRC only (mutagen normalizes TYER to TDRC)
3279                    let is_tyer = v24_bytes == b"TYER";
3280                    let key_ptr = if is_tyer {
3281                        let tdrc_key = intern_tag_key(b"TDRC");
3282                        // If TDRC already exists, skip TYER entirely
3283                        if pyo3::ffi::PyDict_Contains(dict_ptr, tdrc_key) != 0 {
3284                            pyo3::ffi::Py_DECREF(tdrc_key);
3285                            pyo3::ffi::Py_DECREF(final_ptr);
3286                            continue;
3287                        }
3288                        tdrc_key
3289                    } else {
3290                        intern_tag_key(v24_bytes)
3291                    };
3292                    let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
3293                    if existing.is_null() {
3294                        pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, final_ptr);
3295                        key_ptrs.push(key_ptr);
3296                    } else {
3297                        if pyo3::ffi::PyList_Check(existing) != 0 {
3298                            pyo3::ffi::PyList_Append(existing, final_ptr);
3299                        } else {
3300                            let list = pyo3::ffi::PyList_New(2);
3301                            pyo3::ffi::Py_INCREF(existing);
3302                            pyo3::ffi::PyList_SET_ITEM(list, 0, existing);
3303                            pyo3::ffi::Py_INCREF(final_ptr);
3304                            pyo3::ffi::PyList_SET_ITEM(list, 1, final_ptr);
3305                            pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list);
3306                            pyo3::ffi::Py_DECREF(list);
3307                        }
3308                        pyo3::ffi::Py_DECREF(key_ptr);
3309                    }
3310                    pyo3::ffi::Py_DECREF(final_ptr);
3311                    continue;
3312                }
3313            }
3314        }
3315
3316        // Full decode fallback (skip if key exists)
3317        if let Ok(frame) = id3::frames::parse_frame(v24_id, frame_data) {
3318            let key = frame.hash_key();
3319            let py_val = frame_to_py(py, &frame);
3320            unsafe {
3321                let key_ptr = intern_tag_key(key.as_str().as_bytes());
3322                if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
3323                    pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
3324                    key_ptrs.push(key_ptr);
3325                } else {
3326                    pyo3::ffi::Py_DECREF(key_ptr);
3327                }
3328            }
3329        }
3330    }
3331}
3332
3333/// Walk v2.3/v2.4 ID3 frames and emit directly to PyDict.
3334#[inline(always)]
3335fn fast_walk_v2x_frames(
3336    py: Python<'_>, tag_bytes: &[u8], offset: &mut usize, version: u8, bpi: u8,
3337    dict_ptr: *mut pyo3::ffi::PyObject, key_ptrs: &mut Vec<*mut pyo3::ffi::PyObject>,
3338) {
3339    while *offset + 10 <= tag_bytes.len() {
3340        if tag_bytes[*offset] == 0 { break; }
3341        let id_bytes = &tag_bytes[*offset..*offset+4];
3342        if !id_bytes.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) {
3343            // Scan forward for next valid frame header
3344            *offset += 1;
3345            while *offset + 10 <= tag_bytes.len() {
3346                if tag_bytes[*offset] == 0 { break; }
3347                let next_id = &tag_bytes[*offset..*offset+4];
3348                if next_id.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) { break; }
3349                *offset += 1;
3350            }
3351            continue;
3352        }
3353        let size = id3::header::BitPaddedInt::decode(&tag_bytes[*offset+4..*offset+8], bpi) as usize;
3354        let flags = u16::from_be_bytes([tag_bytes[*offset+8], tag_bytes[*offset+9]]);
3355        *offset += 10;
3356        if size == 0 { continue; }
3357        if *offset + size > tag_bytes.len() { break; }
3358
3359        let (compressed, encrypted, unsynchronised, has_data_length) = if version == 4 {
3360            (flags & 0x0008 != 0, flags & 0x0004 != 0, flags & 0x0002 != 0, flags & 0x0001 != 0)
3361        } else {
3362            (flags & 0x0080 != 0, flags & 0x0040 != 0, false, flags & 0x0080 != 0)
3363        };
3364
3365        let id_str = std::str::from_utf8(id_bytes).unwrap_or("XXXX");
3366
3367        if !encrypted && !compressed && !unsynchronised && !has_data_length {
3368            // Fast path: no frame flags
3369            let frame_data = &tag_bytes[*offset..*offset+size];
3370            *offset += size;
3371
3372            // Simple text frames: zero-alloc direct to Python
3373            if id_bytes[0] == b'T' && id_str != "TXXX" && id_str != "TIPL" && id_str != "TMCL" && id_str != "IPLS" {
3374                unsafe {
3375                    if let Some(py_ptr) = try_text_frame_to_py(frame_data) {
3376                        let final_ptr = if id_bytes == b"TCON" {
3377                            let py_str = pyo3::ffi::PyUnicode_AsUTF8(py_ptr);
3378                            if !py_str.is_null() {
3379                                let s = std::ffi::CStr::from_ptr(py_str).to_string_lossy();
3380                                let resolved = resolve_tcon_genre(&s);
3381                                let r = resolved.as_bytes();
3382                                let new_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
3383                                    r.as_ptr() as *const std::ffi::c_char,
3384                                    r.len() as pyo3::ffi::Py_ssize_t);
3385                                pyo3::ffi::Py_DECREF(py_ptr);
3386                                new_ptr
3387                            } else { py_ptr }
3388                        } else { py_ptr };
3389                        if final_ptr.is_null() { continue; }
3390                        let is_tyer = id_bytes == b"TYER";
3391                        let key_ptr = if is_tyer {
3392                            let tdrc_key = intern_tag_key(b"TDRC");
3393                            if pyo3::ffi::PyDict_Contains(dict_ptr, tdrc_key) != 0 {
3394                                pyo3::ffi::Py_DECREF(tdrc_key);
3395                                pyo3::ffi::Py_DECREF(final_ptr);
3396                                continue;
3397                            }
3398                            tdrc_key
3399                        } else {
3400                            intern_tag_key(id_bytes)
3401                        };
3402                        let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
3403                        if existing.is_null() {
3404                            pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, final_ptr);
3405                            key_ptrs.push(key_ptr);
3406                        } else {
3407                            if pyo3::ffi::PyList_Check(existing) != 0 {
3408                                pyo3::ffi::PyList_Append(existing, final_ptr);
3409                            } else {
3410                                let list = pyo3::ffi::PyList_New(2);
3411                                pyo3::ffi::Py_INCREF(existing);
3412                                pyo3::ffi::PyList_SET_ITEM(list, 0, existing);
3413                                pyo3::ffi::Py_INCREF(final_ptr);
3414                                pyo3::ffi::PyList_SET_ITEM(list, 1, final_ptr);
3415                                pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list);
3416                                pyo3::ffi::Py_DECREF(list);
3417                            }
3418                            pyo3::ffi::Py_DECREF(key_ptr);
3419                        }
3420                        pyo3::ffi::Py_DECREF(final_ptr);
3421                        continue;
3422                    }
3423                }
3424            }
3425
3426            // URL frames: raw Latin-1, no encoding byte
3427            if id_bytes[0] == b'W' && id_str != "WXXX" {
3428                let mut flen = frame_data.len();
3429                while flen > 0 && frame_data[flen-1] == 0 { flen -= 1; }
3430                if flen > 0 && frame_data[..flen].iter().all(|&b| b < 128) {
3431                    unsafe {
3432                        let py_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
3433                            frame_data.as_ptr() as *const std::ffi::c_char, flen as pyo3::ffi::Py_ssize_t);
3434                        if !py_ptr.is_null() {
3435                            let key_ptr = intern_tag_key(id_bytes);
3436                            if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
3437                                pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_ptr);
3438                                key_ptrs.push(key_ptr);
3439                            } else {
3440                                pyo3::ffi::Py_DECREF(key_ptr);
3441                            }
3442                            pyo3::ffi::Py_DECREF(py_ptr);
3443                            continue;
3444                        }
3445                    }
3446                }
3447            }
3448
3449            // Full decode fallback
3450            if let Ok(frame) = id3::frames::parse_frame(id_str, frame_data) {
3451                emit_frame_to_dict(py, &frame, id_str, dict_ptr, key_ptrs);
3452            }
3453        } else {
3454            // Frame with flags: need data mutations
3455            let mut frame_data = tag_bytes[*offset..*offset+size].to_vec();
3456            *offset += size;
3457            if encrypted { continue; }
3458            if has_data_length && frame_data.len() >= 4 {
3459                frame_data = frame_data[4..].to_vec();
3460            }
3461            if unsynchronised {
3462                frame_data = match id3::unsynch::decode(&frame_data) {
3463                    Ok(d) => d,
3464                    Err(_) => continue,
3465                };
3466            }
3467            if compressed {
3468                frame_data = match id3::tags::decompress_zlib(&frame_data) {
3469                    Ok(d) => d,
3470                    Err(_) => continue,
3471                };
3472            }
3473
3474            if let Ok(frame) = id3::frames::parse_frame(id_str, &frame_data) {
3475                emit_frame_to_dict(py, &frame, id_str, dict_ptr, key_ptrs);
3476            }
3477        }
3478    }
3479}
3480
3481/// Emit a parsed ID3 frame to dict with TCON resolution, TYER→TDRC, and merge support.
3482fn emit_frame_to_dict(
3483    py: Python<'_>, frame: &id3::frames::Frame, id_str: &str,
3484    dict_ptr: *mut pyo3::ffi::PyObject, key_ptrs: &mut Vec<*mut pyo3::ffi::PyObject>,
3485) {
3486    // TCON genre resolution
3487    let frame_ref;
3488    let resolved_frame;
3489    let actual_frame = if id_str == "TCON" {
3490        if let id3::frames::Frame::Text(tf) = frame {
3491            let resolved_text: Vec<String> = tf.text.iter()
3492                .map(|t| resolve_tcon_genre(t))
3493                .collect();
3494            resolved_frame = id3::frames::Frame::Text(id3::frames::TextFrame {
3495                id: tf.id.clone(),
3496                encoding: tf.encoding,
3497                text: resolved_text,
3498            });
3499            frame_ref = &resolved_frame;
3500            frame_ref
3501        } else { frame }
3502    } else { frame };
3503
3504    let key = actual_frame.hash_key();
3505    let py_val = frame_to_py(py, actual_frame);
3506    unsafe {
3507        // TYER normalization: store as TDRC only if TDRC not already present
3508        let key_ptr = if id_str == "TYER" {
3509            let tdrc_key = intern_tag_key(b"TDRC");
3510            if pyo3::ffi::PyDict_Contains(dict_ptr, tdrc_key) != 0 {
3511                pyo3::ffi::Py_DECREF(tdrc_key);
3512                return; // TDRC already exists, skip TYER
3513            }
3514            tdrc_key
3515        } else {
3516            intern_tag_key(key.as_str().as_bytes())
3517        };
3518        let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
3519        if existing.is_null() {
3520            pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
3521            key_ptrs.push(key_ptr);
3522        } else {
3523            // Text frame merge: append values
3524            if let id3::frames::Frame::Text(_) = actual_frame {
3525                if pyo3::ffi::PyList_Check(existing) != 0 {
3526                    if pyo3::ffi::PyList_Check(py_val.as_ptr()) != 0 {
3527                        let n = pyo3::ffi::PyList_Size(py_val.as_ptr());
3528                        for i in 0..n {
3529                            let item = pyo3::ffi::PyList_GetItem(py_val.as_ptr(), i);
3530                            pyo3::ffi::PyList_Append(existing, item);
3531                        }
3532                    } else {
3533                        pyo3::ffi::PyList_Append(existing, py_val.as_ptr());
3534                    }
3535                } else {
3536                    // Convert existing string + new to list
3537                    let list = pyo3::ffi::PyList_New(0);
3538                    pyo3::ffi::PyList_Append(list, existing);
3539                    if pyo3::ffi::PyList_Check(py_val.as_ptr()) != 0 {
3540                        let n = pyo3::ffi::PyList_Size(py_val.as_ptr());
3541                        for i in 0..n {
3542                            let item = pyo3::ffi::PyList_GetItem(py_val.as_ptr(), i);
3543                            pyo3::ffi::PyList_Append(list, item);
3544                        }
3545                    } else {
3546                        pyo3::ffi::PyList_Append(list, py_val.as_ptr());
3547                    }
3548                    pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list);
3549                    pyo3::ffi::Py_DECREF(list);
3550                }
3551            }
3552            pyo3::ffi::Py_DECREF(key_ptr);
3553        }
3554    }
3555}
3556
3557/// Single-pass VC parsing directly to PyDict — no intermediate Vec allocation.
3558/// For each VC entry: create Python key+value, set in dict. Duplicate keys get list append.
3559#[inline(always)]
3560fn parse_vc_to_dict_direct<'py>(
3561    _py: Python<'py>,
3562    data: &[u8],
3563    dict: &Bound<'py, PyDict>,
3564    keys_out: &mut Vec<*mut pyo3::ffi::PyObject>,
3565) -> PyResult<()> {
3566    if data.len() < 8 { return Ok(()); }
3567    let mut pos = 0;
3568    let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
3569    pos += 4;
3570    if pos + vendor_len > data.len() { return Ok(()); }
3571    pos += vendor_len;
3572    if pos + 4 > data.len() { return Ok(()); }
3573    let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
3574    pos += 4;
3575
3576    let dict_ptr = dict.as_ptr();
3577
3578    for _ in 0..count {
3579        if pos + 4 > data.len() { break; }
3580        let clen = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
3581        pos += 4;
3582        if pos + clen > data.len() { break; }
3583        let raw = &data[pos..pos + clen];
3584        pos += clen;
3585
3586        let eq_pos = match memchr::memchr(b'=', raw) {
3587            Some(p) => p,
3588            None => continue,
3589        };
3590        let key_bytes = &raw[..eq_pos];
3591        let value_bytes = &raw[eq_pos + 1..];
3592
3593        unsafe {
3594            // Always lowercase key into stack buffer (matches mutagen behavior)
3595            let mut buf = [0u8; 128];
3596            let key_len = key_bytes.len().min(128);
3597            for i in 0..key_len { buf[i] = key_bytes[i].to_ascii_lowercase(); }
3598
3599            let key_ptr = intern_tag_key(&buf[..key_len]);
3600            if key_ptr.is_null() { pyo3::ffi::PyErr_Clear(); continue; }
3601
3602            // Create value PyUnicode directly from raw bytes (CPython validates UTF-8)
3603            let val_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
3604                value_bytes.as_ptr() as *const std::ffi::c_char,
3605                value_bytes.len() as pyo3::ffi::Py_ssize_t);
3606            if val_ptr.is_null() {
3607                pyo3::ffi::PyErr_Clear();
3608                pyo3::ffi::Py_DECREF(key_ptr);
3609                continue;
3610            }
3611
3612            // Single hash lookup: PyDict_GetItem returns borrowed ref or NULL
3613            let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
3614            if !existing.is_null() {
3615                if pyo3::ffi::PyList_Check(existing) != 0 {
3616                    // Already a list from prior duplicate: append
3617                    pyo3::ffi::PyList_Append(existing, val_ptr);
3618                    pyo3::ffi::Py_DECREF(val_ptr); // Append INCREFs internally
3619                } else {
3620                    // First duplicate: create [existing_val, new_val]
3621                    let list_ptr = pyo3::ffi::PyList_New(2);
3622                    pyo3::ffi::Py_INCREF(existing); // SET_ITEM steals ref
3623                    pyo3::ffi::PyList_SET_ITEM(list_ptr, 0, existing);
3624                    pyo3::ffi::PyList_SET_ITEM(list_ptr, 1, val_ptr); // steals ref, don't DECREF
3625                    pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list_ptr);
3626                    pyo3::ffi::Py_DECREF(list_ptr);
3627                }
3628                pyo3::ffi::Py_DECREF(key_ptr);
3629            } else {
3630                // New key: store value directly (no list wrapper for speed)
3631                pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, val_ptr);
3632                pyo3::ffi::Py_DECREF(val_ptr);
3633                keys_out.push(key_ptr);
3634            }
3635        }
3636    }
3637    Ok(())
3638}
3639
3640/// Direct FLAC → PyDict (bypasses PreSerializedFile).
3641/// Uses single-pass VC parsing directly to dict.
3642#[inline(always)]
3643fn fast_read_flac_direct<'py>(py: Python<'py>, data: &[u8], file_size: usize, dict: &Bound<'py, PyDict>) -> PyResult<bool> {
3644    let flac_offset = if data.len() >= 4 && &data[0..4] == b"fLaC" {
3645        0
3646    } else if data.len() >= 10 && &data[0..3] == b"ID3" {
3647        let size = crate::id3::header::BitPaddedInt::syncsafe(&data[6..10]) as usize;
3648        let off = 10 + size;
3649        if off + 4 > data.len() || &data[off..off+4] != b"fLaC" { return Ok(false); }
3650        off
3651    } else {
3652        return Ok(false);
3653    };
3654
3655    let mut pos = flac_offset + 4;
3656    let mut streaminfo: Option<flac::StreamInfo> = None;
3657    let mut vc_data: Option<&[u8]> = None;
3658    let mut picture_blocks: Vec<(usize, usize)> = Vec::new();
3659
3660    loop {
3661        if pos + 4 > data.len() { break; }
3662        let header = data[pos];
3663        let is_last = header & 0x80 != 0;
3664        let bt = header & 0x7F;
3665        let block_size = ((data[pos+1] as usize) << 16) | ((data[pos+2] as usize) << 8) | (data[pos+3] as usize);
3666        pos += 4;
3667        if pos + block_size > data.len() { break; }
3668
3669        match bt {
3670            0 => {
3671                if let Ok(si) = flac::StreamInfo::parse(&data[pos..pos+block_size]) {
3672                    streaminfo = Some(si);
3673                }
3674            }
3675            4 => {
3676                let vc_size = flac::compute_vc_data_size(&data[pos..]).unwrap_or(block_size);
3677                let end = pos.saturating_add(vc_size).min(data.len());
3678                vc_data = Some(&data[pos..end]);
3679            }
3680            6 => {
3681                picture_blocks.push((pos, block_size));
3682            }
3683            _ => {}
3684        }
3685
3686        pos += block_size;
3687        if is_last { break; }
3688    }
3689
3690    // pos now points to the start of audio frames (after all metadata blocks)
3691    let si = match streaminfo {
3692        Some(si) => si,
3693        None => return Ok(false),
3694    };
3695    // Bitrate: use audio data size only (exclude metadata), matching mutagen behavior
3696    let audio_data_size = file_size.saturating_sub(pos);
3697    let bitrate = if si.length > 0.0 {
3698        (audio_data_size as f64 * 8.0 / si.length) as u32
3699    } else { 0 };
3700    let dict_ptr = dict.as_ptr();
3701    unsafe {
3702        set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), si.length);
3703        set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), si.sample_rate);
3704        set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), si.channels as u32);
3705        set_dict_u32(dict_ptr, pyo3::intern!(py, "bits_per_sample").as_ptr(), si.bits_per_sample as u32);
3706        set_dict_i64(dict_ptr, pyo3::intern!(py, "total_samples").as_ptr(), si.total_samples as i64);
3707        set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), bitrate);
3708    }
3709
3710    let mut keys_out: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
3711    if let Some(vc) = vc_data {
3712        parse_vc_to_dict_direct(py, vc, dict, &mut keys_out)?;
3713    }
3714
3715    // Add pictures to dict as _pictures list
3716    if !picture_blocks.is_empty() {
3717        let pics = PyList::empty(py);
3718        for (pic_pos, pic_size) in &picture_blocks {
3719            if let Ok(pic) = flac::FLACPicture::parse(&data[*pic_pos..*pic_pos + *pic_size]) {
3720                let d = PyDict::new(py);
3721                let _ = d.set_item("type", pic.pic_type);
3722                let _ = d.set_item("mime", &pic.mime);
3723                let _ = d.set_item("desc", &pic.desc);
3724                let _ = d.set_item("width", pic.width);
3725                let _ = d.set_item("height", pic.height);
3726                let _ = d.set_item("depth", pic.depth);
3727                let _ = d.set_item("colors", pic.colors);
3728                let _ = d.set_item("data", pyo3::types::PyBytes::new(py, &pic.data));
3729                let _ = pics.append(d);
3730            }
3731        }
3732        let _ = dict.set_item(pyo3::intern!(py, "_pictures"), pics);
3733    }
3734
3735    set_keys_list(py, dict, keys_out)?;
3736    unsafe {
3737        let fmt = pyo3::ffi::PyUnicode_InternFromString(b"flac\0".as_ptr() as *const std::ffi::c_char);
3738        pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
3739        pyo3::ffi::Py_DECREF(fmt);
3740    }
3741    Ok(true)
3742}
3743
3744/// Direct OGG → PyDict (bypasses PreSerializedFile).
3745#[inline(always)]
3746fn fast_read_ogg_direct<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
3747    if data.len() < 58 || &data[0..4] != b"OggS" { return Ok(false); }
3748
3749    let serial = u32::from_le_bytes([data[14], data[15], data[16], data[17]]);
3750    let num_seg = data[26] as usize;
3751    let seg_table_end = 27 + num_seg;
3752    if seg_table_end > data.len() { return Ok(false); }
3753
3754    let page_data_size: usize = data[27..seg_table_end].iter().map(|&s| s as usize).sum();
3755    let first_page_end = seg_table_end + page_data_size;
3756
3757    if seg_table_end + 30 > data.len() { return Ok(false); }
3758    let id_data = &data[seg_table_end..];
3759    if id_data.len() < 30 || &id_data[0..7] != b"\x01vorbis" { return Ok(false); }
3760
3761    let channels = id_data[11];
3762    let sample_rate = u32::from_le_bytes([id_data[12], id_data[13], id_data[14], id_data[15]]);
3763    let nominal_bitrate = u32::from_le_bytes([id_data[20], id_data[21], id_data[22], id_data[23]]);
3764
3765    if first_page_end + 27 > data.len() { return Ok(false); }
3766    if &data[first_page_end..first_page_end+4] != b"OggS" { return Ok(false); }
3767
3768    // Try fast single-page path first, fall back to multi-page assembly
3769    let seg2_count = data[first_page_end + 26] as usize;
3770    let seg2_table_start = first_page_end + 27;
3771    let seg2_table_end = seg2_table_start + seg2_count;
3772    if seg2_table_end > data.len() { return Ok(false); }
3773
3774    let seg2_table = &data[seg2_table_start..seg2_table_end];
3775    let mut first_packet_size = 0usize;
3776    let mut single_page = false;
3777    for &seg in seg2_table {
3778        first_packet_size += seg as usize;
3779        if seg < 255 { single_page = true; break; }
3780    }
3781
3782    let length = ogg::find_last_granule(data, serial)
3783        .map(|g| if g > 0 && sample_rate > 0 { g as f64 / sample_rate as f64 } else { 0.0 })
3784        .unwrap_or(0.0);
3785
3786    let bitrate = if nominal_bitrate > 0 {
3787        nominal_bitrate
3788    } else if length > 0.0 {
3789        (data.len() as f64 * 8.0 / length) as u32
3790    } else { 0 };
3791
3792    let dict_ptr_ogg = dict.as_ptr();
3793    unsafe {
3794        set_dict_f64(dict_ptr_ogg, pyo3::intern!(py, "length").as_ptr(), length);
3795        set_dict_u32(dict_ptr_ogg, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
3796        set_dict_u32(dict_ptr_ogg, pyo3::intern!(py, "channels").as_ptr(), channels as u32);
3797        set_dict_u32(dict_ptr_ogg, pyo3::intern!(py, "bitrate").as_ptr(), bitrate);
3798    }
3799
3800    let mut keys_out: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
3801    if single_page {
3802        // Fast path: packet fits in one page, zero-copy VC parse
3803        let comment_start = seg2_table_end;
3804        if comment_start + first_packet_size > data.len() { return Ok(false); }
3805        if first_packet_size < 7 { return Ok(false); }
3806        if &data[comment_start..comment_start+7] != b"\x03vorbis" { return Ok(false); }
3807        let vc_data = &data[comment_start + 7..comment_start + first_packet_size];
3808        parse_vc_to_dict_direct(py, vc_data, dict, &mut keys_out)?;
3809    } else {
3810        // Slow path: multi-page assembly
3811        let comment_packet = match ogg::ogg_assemble_first_packet(data, first_page_end) {
3812            Some(p) => p,
3813            None => return Ok(false),
3814        };
3815        if comment_packet.len() < 7 { return Ok(false); }
3816        if &comment_packet[0..7] != b"\x03vorbis" { return Ok(false); }
3817        parse_vc_to_dict_direct(py, &comment_packet[7..], dict, &mut keys_out)?;
3818    }
3819    set_keys_list(py, dict, keys_out)?;
3820    unsafe {
3821        let fmt = pyo3::ffi::PyUnicode_InternFromString(b"ogg\0".as_ptr() as *const std::ffi::c_char);
3822        pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
3823        pyo3::ffi::Py_DECREF(fmt);
3824    }
3825    Ok(true)
3826}
3827
3828/// Direct MP3 → PyDict: inline ID3 frame walking with zero-alloc text frame decoding.
3829/// Eliminates raw_buf copy, LazyFrame allocation, and Rust String allocation for text frames.
3830#[inline(always)]
3831fn fast_read_mp3_direct<'py>(py: Python<'py>, data: &[u8], _path: &str, dict: &Bound<'py, PyDict>) -> PyResult<bool> {
3832    let file_size = data.len() as u64;
3833
3834    // 1. Parse ID3v2 header (10 bytes only)
3835    let (id3_header, audio_start) = if data.len() >= 10 {
3836        match id3::header::ID3Header::parse(&data[0..10], 0) {
3837            Ok(h) => {
3838                let tag_size = h.size as usize;
3839                if 10 + tag_size <= data.len() {
3840                    let astart = h.full_size() as usize;
3841                    (Some(h), astart)
3842                } else { (None, 0) }
3843            }
3844            Err(_) => (None, 0),
3845        }
3846    } else { (None, 0) };
3847
3848    // 2. Parse MPEG audio info
3849    let audio_end = data.len().min(audio_start + 8192);
3850    let audio_data = if audio_start < data.len() { &data[audio_start..audio_end] } else { &[] };
3851    let info = match mp3::MPEGInfo::parse(audio_data, 0, file_size.saturating_sub(audio_start as u64)) {
3852        Ok(i) => i,
3853        Err(_) => return Ok(false),
3854    };
3855
3856    // 3. Set info fields using raw FFI
3857    let dict_ptr = dict.as_ptr();
3858    unsafe {
3859        set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), info.length);
3860        set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), info.sample_rate);
3861        set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), info.channels);
3862        set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), info.bitrate);
3863        set_dict_f64(dict_ptr, pyo3::intern!(py, "version").as_ptr(), info.version);
3864        set_dict_i64(dict_ptr, pyo3::intern!(py, "layer").as_ptr(), info.layer as i64);
3865        set_dict_i64(dict_ptr, pyo3::intern!(py, "mode").as_ptr(), info.mode as i64);
3866        set_dict_bool(dict_ptr, pyo3::intern!(py, "protected").as_ptr(), info.protected);
3867        set_dict_i64(dict_ptr, pyo3::intern!(py, "bitrate_mode").as_ptr(), match info.bitrate_mode {
3868            mp3::xing::BitrateMode::Unknown => 0,
3869            mp3::xing::BitrateMode::CBR => 1,
3870            mp3::xing::BitrateMode::VBR => 2,
3871            mp3::xing::BitrateMode::ABR => 3,
3872        });
3873    }
3874
3875    // 4. Walk ID3v2 frames directly (no LazyFrame/ID3Tags intermediary)
3876    let mut key_ptrs: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
3877
3878    if let Some(ref h) = id3_header {
3879        let tag_size = h.size as usize;
3880        let version = h.version.0;
3881
3882        // Handle whole-tag unsynchronisation (v2.3 and below)
3883        let decoded_buf;
3884        let tag_bytes: &[u8] = if h.flags.unsynchronisation && version < 4 {
3885            decoded_buf = id3::unsynch::decode(&data[10..10 + tag_size]).unwrap_or_default();
3886            &decoded_buf[..]
3887        } else {
3888            &data[10..10 + tag_size]
3889        };
3890
3891        let mut offset = 0usize;
3892
3893        // Skip extended header
3894        if h.flags.extended && version >= 3 && tag_bytes.len() >= 4 {
3895            let ext_size = if version == 4 {
3896                id3::header::BitPaddedInt::syncsafe(&tag_bytes[0..4]) as usize
3897            } else {
3898                u32::from_be_bytes([tag_bytes[0], tag_bytes[1], tag_bytes[2], tag_bytes[3]]) as usize
3899            };
3900            offset = if version == 4 { ext_size } else { ext_size + 4 };
3901        }
3902
3903        let bpi = if version == 4 {
3904            id3::header::determine_bpi(&tag_bytes[offset..], tag_bytes.len())
3905        } else { 8 };
3906
3907        if version == 2 {
3908            fast_walk_v22_frames(py, tag_bytes, &mut offset, dict_ptr, &mut key_ptrs);
3909        } else {
3910            fast_walk_v2x_frames(py, tag_bytes, &mut offset, version, bpi, dict_ptr, &mut key_ptrs);
3911        }
3912    }
3913
3914    // 5. Check for ID3v1 at file end
3915    if data.len() >= 128 {
3916        let v1_data = &data[data.len() - 128..];
3917        if v1_data.len() >= 3 && &v1_data[0..3] == b"TAG" {
3918            if let Ok(v1_frames) = id3::id3v1::parse_id3v1(v1_data) {
3919                for frame in v1_frames {
3920                    let key = frame.hash_key();
3921                    let key_str = key.as_str();
3922                    unsafe {
3923                        let key_ptr = intern_tag_key(key_str.as_bytes());
3924                        if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
3925                            let py_val = frame_to_py(py, &frame);
3926                            pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
3927                            key_ptrs.push(key_ptr);
3928                        } else {
3929                            pyo3::ffi::Py_DECREF(key_ptr);
3930                        }
3931                    }
3932                }
3933            }
3934        }
3935    }
3936
3937    set_keys_list(py, dict, key_ptrs)?;
3938    unsafe {
3939        let fmt = pyo3::ffi::PyUnicode_InternFromString(b"mp3\0".as_ptr() as *const std::ffi::c_char);
3940        pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
3941        pyo3::ffi::Py_DECREF(fmt);
3942        // _has_tags: true only if an ID3 header was found
3943        let has_tags = if id3_header.is_some() { pyo3::ffi::Py_True() } else { pyo3::ffi::Py_False() };
3944        pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_has_tags").as_ptr(), has_tags);
3945    }
3946    Ok(true)
3947}
3948
3949/// Direct MP4 → PyDict: inline atom walking, zero Rust String allocation.
3950/// Converts atom data directly to Python objects, skipping MP4File/MP4Tags intermediary.
3951#[inline(always)]
3952fn fast_read_mp4_direct<'py>(py: Python<'py>, data: &[u8], _path: &str, dict: &Bound<'py, PyDict>) -> PyResult<bool> {
3953    use mp4::atom::AtomIter;
3954
3955    // 1. Find moov atom
3956    let moov = match AtomIter::new(data, 0, data.len()).find_name(b"moov") {
3957        Some(a) => a,
3958        None => return Ok(false),
3959    };
3960    let moov_s = moov.data_offset;
3961    let moov_e = moov_s + moov.data_size;
3962
3963    // 2. Parse mvhd for duration
3964    let mut duration = 0u64;
3965    let mut timescale = 1000u32;
3966    if let Some(mvhd) = AtomIter::new(data, moov_s, moov_e).find_name(b"mvhd") {
3967        let d = &data[mvhd.data_offset..mvhd.data_offset + mvhd.data_size.min(32)];
3968        if !d.is_empty() {
3969            let version = d[0];
3970            if version == 0 && d.len() >= 20 {
3971                timescale = u32::from_be_bytes([d[12], d[13], d[14], d[15]]);
3972                duration = u32::from_be_bytes([d[16], d[17], d[18], d[19]]) as u64;
3973            } else if version == 1 && d.len() >= 32 {
3974                timescale = u32::from_be_bytes([d[20], d[21], d[22], d[23]]);
3975                duration = u64::from_be_bytes([d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]]);
3976            }
3977        }
3978    }
3979    let length = if timescale > 0 { duration as f64 / timescale as f64 } else { 0.0 };
3980
3981    // 3. Find audio track for codec/channels/sample_rate
3982    let mut channels = 2u32;
3983    let mut sample_rate = 44100u32;
3984    let mut bits_per_sample = 16u32;
3985    let mut codec_bytes: [u8; 4] = *b"mp4a";
3986    let mut esds_bitrate = 0u32;
3987
3988    'trak_loop: for trak in AtomIter::new(data, moov_s, moov_e) {
3989        if trak.name != *b"trak" { continue; }
3990        let trak_s = trak.data_offset;
3991        let trak_e = trak_s + trak.data_size;
3992        let mdia = match AtomIter::new(data, trak_s, trak_e).find_name(b"mdia") {
3993            Some(a) => a, None => continue,
3994        };
3995        let mdia_s = mdia.data_offset;
3996        let mdia_e = mdia_s + mdia.data_size;
3997        // Check for sound handler
3998        let is_audio = AtomIter::new(data, mdia_s, mdia_e).any(|a| {
3999            if a.name == *b"hdlr" {
4000                let d = &data[a.data_offset..a.data_offset + a.data_size.min(12)];
4001                d.len() >= 12 && &d[8..12] == b"soun"
4002            } else { false }
4003        });
4004        if !is_audio { continue; }
4005        let minf = match AtomIter::new(data, mdia_s, mdia_e).find_name(b"minf") {
4006            Some(a) => a, None => continue,
4007        };
4008        let stbl = match AtomIter::new(data, minf.data_offset, minf.data_offset + minf.data_size).find_name(b"stbl") {
4009            Some(a) => a, None => continue,
4010        };
4011        let stsd = match AtomIter::new(data, stbl.data_offset, stbl.data_offset + stbl.data_size).find_name(b"stsd") {
4012            Some(a) => a, None => continue,
4013        };
4014        let stsd_data = &data[stsd.data_offset..stsd.data_offset + stsd.data_size];
4015        if stsd_data.len() >= 16 {
4016            let entry_data = &stsd_data[8..];
4017            if entry_data.len() >= 36 {
4018                let entry_size = u32::from_be_bytes([entry_data[0], entry_data[1], entry_data[2], entry_data[3]]) as usize;
4019                codec_bytes.copy_from_slice(&entry_data[4..8]);
4020                let audio_entry = &entry_data[8..];
4021                if audio_entry.len() >= 20 {
4022                    channels = u16::from_be_bytes([audio_entry[16], audio_entry[17]]) as u32;
4023                    bits_per_sample = u16::from_be_bytes([audio_entry[18], audio_entry[19]]) as u32;
4024                    if audio_entry.len() >= 28 {
4025                        sample_rate = u16::from_be_bytes([audio_entry[24], audio_entry[25]]) as u32;
4026                    }
4027                }
4028                // Look for esds sub-atom for accurate bitrate
4029                if entry_size > 36 && audio_entry.len() >= entry_size - 8 {
4030                    let sub_start = stsd.data_offset + 8 + 8 + 28;
4031                    let sub_end = stsd.data_offset + 8 + entry_size;
4032                    if sub_end <= data.len() {
4033                        for sub in AtomIter::new(data, sub_start, sub_end) {
4034                            if sub.name == *b"esds" {
4035                                let esds = &data[sub.data_offset..sub.data_offset + sub.data_size];
4036                                esds_bitrate = mp4::parse_esds_bitrate(esds);
4037                                break;
4038                            }
4039                        }
4040                    }
4041                }
4042            }
4043        }
4044        break 'trak_loop;
4045    }
4046
4047    let bitrate = if esds_bitrate > 0 {
4048        esds_bitrate
4049    } else if length > 0.0 {
4050        (data.len() as f64 * 8.0 / length) as u32
4051    } else { 0 };
4052
4053    // 4. Set info fields via raw FFI (no Rust String for codec)
4054    let dict_ptr = dict.as_ptr();
4055    unsafe {
4056        set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), length);
4057        set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
4058        set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), channels);
4059        set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), bitrate);
4060        set_dict_u32(dict_ptr, pyo3::intern!(py, "bits_per_sample").as_ptr(), bits_per_sample);
4061        // Codec: create Python string directly from 4 bytes (no Rust String)
4062        let codec_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
4063            codec_bytes.as_ptr() as *const std::ffi::c_char, 4);
4064        pyo3::ffi::PyDict_SetItem(dict_ptr, pyo3::intern!(py, "codec").as_ptr(), codec_ptr);
4065        pyo3::ffi::Py_DECREF(codec_ptr);
4066    }
4067
4068    // 5. Walk ilst and convert tags directly to Python (no MP4Tags intermediate)
4069    let mut key_ptrs: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
4070
4071    if let Some(udta) = AtomIter::new(data, moov_s, moov_e).find_name(b"udta") {
4072        if let Some(meta) = AtomIter::new(data, udta.data_offset, udta.data_offset + udta.data_size).find_name(b"meta") {
4073            let meta_off = meta.data_offset + 4;
4074            let meta_end = meta.data_offset + meta.data_size;
4075            if meta_off < meta_end {
4076                if let Some(ilst) = AtomIter::new(data, meta_off, meta_end).find_name(b"ilst") {
4077                    for item in AtomIter::new(data, ilst.data_offset, ilst.data_offset + ilst.data_size) {
4078                        // For freeform atoms (----), build key from mean+name sub-atoms
4079                        let key_ptr = if item.name == *b"----" {
4080                            let freeform_key = mp4::build_freeform_key(data, item.data_offset, item.data_offset + item.data_size);
4081                            unsafe {
4082                                let bytes = freeform_key.as_bytes();
4083                                pyo3::ffi::PyUnicode_FromStringAndSize(
4084                                    bytes.as_ptr() as *const std::ffi::c_char,
4085                                    bytes.len() as pyo3::ffi::Py_ssize_t)
4086                            }
4087                        } else {
4088                            unsafe { mp4_atom_name_to_py_key(&item.name) }
4089                        };
4090                        if key_ptr.is_null() { continue; }
4091
4092                        // Find first "data" atom and convert value directly to Python
4093                        for da in AtomIter::new(data, item.data_offset, item.data_offset + item.data_size) {
4094                            if da.name != *b"data" { continue; }
4095                            let ad = &data[da.data_offset..da.data_offset + da.data_size];
4096                            if ad.len() < 8 { continue; }
4097                            let type_ind = u32::from_be_bytes([ad[0], ad[1], ad[2], ad[3]]);
4098                            let vd = &ad[8..];
4099
4100                            let py_val = unsafe { mp4_data_to_py_raw(py, &item.name, type_ind, vd) };
4101                            if !py_val.is_null() {
4102                                unsafe {
4103                                    if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
4104                                        pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val);
4105                                        key_ptrs.push(key_ptr);
4106                                    } else {
4107                                        pyo3::ffi::Py_DECREF(key_ptr);
4108                                    }
4109                                    pyo3::ffi::Py_DECREF(py_val);
4110                                }
4111                            } else {
4112                                unsafe { pyo3::ffi::Py_DECREF(key_ptr); }
4113                            }
4114                            break; // Only first data atom per item
4115                        }
4116                    }
4117                }
4118            }
4119        }
4120    }
4121
4122    set_keys_list(py, dict, key_ptrs)?;
4123    unsafe {
4124        let fmt = pyo3::ffi::PyUnicode_InternFromString(b"mp4\0".as_ptr() as *const std::ffi::c_char);
4125        pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
4126        pyo3::ffi::Py_DECREF(fmt);
4127    }
4128    Ok(true)
4129}
4130
4131/// Convert MP4 atom name to Python string key. Handles 0xa9 prefix → ©.
4132/// Returns new reference (caller must DECREF if not stored).
4133#[inline(always)]
4134unsafe fn mp4_atom_name_to_py_key(name: &[u8; 4]) -> *mut pyo3::ffi::PyObject {
4135    if name[0] == 0xa9 {
4136        // © prefix: create "©" + 3 remaining bytes
4137        let mut buf = [0u8; 5]; // © is 2 bytes in UTF-8 + 3 ASCII = 5
4138        buf[0] = 0xc2; // UTF-8 for ©
4139        buf[1] = 0xa9;
4140        buf[2] = name[1];
4141        buf[3] = name[2];
4142        buf[4] = name[3];
4143        pyo3::ffi::PyUnicode_FromStringAndSize(buf.as_ptr() as *const std::ffi::c_char, 5)
4144    } else {
4145        pyo3::ffi::PyUnicode_FromStringAndSize(name.as_ptr() as *const std::ffi::c_char, 4)
4146    }
4147}
4148
4149/// Convert MP4 data atom value directly to Python object (no Rust allocation).
4150/// Returns new reference or null on failure.
4151#[inline(always)]
4152unsafe fn mp4_data_to_py_raw(_py: Python<'_>, atom_name: &[u8; 4], type_ind: u32, vd: &[u8]) -> *mut pyo3::ffi::PyObject {
4153    match type_ind {
4154        1 => {
4155            // UTF-8 text → Python string directly
4156            pyo3::ffi::PyUnicode_FromStringAndSize(
4157                vd.as_ptr() as *const std::ffi::c_char, vd.len() as pyo3::ffi::Py_ssize_t)
4158        }
4159        21 => {
4160            // Signed integer
4161            let val: i64 = match vd.len() {
4162                1 => vd[0] as i8 as i64,
4163                2 => i16::from_be_bytes([vd[0], vd[1]]) as i64,
4164                4 => i32::from_be_bytes([vd[0], vd[1], vd[2], vd[3]]) as i64,
4165                8 => i64::from_be_bytes([vd[0], vd[1], vd[2], vd[3], vd[4], vd[5], vd[6], vd[7]]),
4166                _ => return std::ptr::null_mut(),
4167            };
4168            pyo3::ffi::PyLong_FromLongLong(val)
4169        }
4170        0 => {
4171            // Implicit type — depends on atom name
4172            if (atom_name == b"trkn" || atom_name == b"disk") && vd.len() >= 6 {
4173                let a = i16::from_be_bytes([vd[2], vd[3]]) as i64;
4174                let b = i16::from_be_bytes([vd[4], vd[5]]) as i64;
4175                let pa = pyo3::ffi::PyLong_FromLongLong(a);
4176                let pb = pyo3::ffi::PyLong_FromLongLong(b);
4177                let tup = pyo3::ffi::PyTuple_New(2);
4178                pyo3::ffi::PyTuple_SET_ITEM(tup, 0, pa);
4179                pyo3::ffi::PyTuple_SET_ITEM(tup, 1, pb);
4180                tup
4181            } else if atom_name == b"gnre" && vd.len() >= 2 {
4182                let genre_id = u16::from_be_bytes([vd[0], vd[1]]) as usize;
4183                if genre_id > 0 && genre_id <= crate::id3::specs::GENRES.len() {
4184                    let g = crate::id3::specs::GENRES[genre_id - 1];
4185                    pyo3::ffi::PyUnicode_FromStringAndSize(
4186                        g.as_ptr() as *const std::ffi::c_char, g.len() as pyo3::ffi::Py_ssize_t)
4187                } else {
4188                    std::ptr::null_mut()
4189                }
4190            } else {
4191                // Unknown implicit type: store as raw bytes (e.g., freeform atoms)
4192                pyo3::ffi::PyBytes_FromStringAndSize(
4193                    vd.as_ptr() as *const std::ffi::c_char, vd.len() as pyo3::ffi::Py_ssize_t)
4194            }
4195        }
4196        13 | 14 => {
4197            // JPEG or PNG cover art → Python bytes
4198            pyo3::ffi::PyBytes_FromStringAndSize(
4199                vd.as_ptr() as *const std::ffi::c_char, vd.len() as pyo3::ffi::Py_ssize_t)
4200        }
4201        _ => std::ptr::null_mut(),
4202    }
4203}
4204
4205// ---- Info-only parsers: parse audio metadata without creating tag Python objects ----
4206
4207/// FLAC info only: just StreamInfo, skip VorbisComment.
4208#[inline(always)]
4209fn fast_info_flac<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
4210    let flac_offset = if data.len() >= 4 && &data[0..4] == b"fLaC" {
4211        0
4212    } else if data.len() >= 10 && &data[0..3] == b"ID3" {
4213        let size = id3::header::BitPaddedInt::syncsafe(&data[6..10]) as usize;
4214        let off = 10 + size;
4215        if off + 4 > data.len() || &data[off..off+4] != b"fLaC" { return Ok(false); }
4216        off
4217    } else {
4218        return Ok(false);
4219    };
4220    let mut pos = flac_offset + 4;
4221    loop {
4222        if pos + 4 > data.len() { break; }
4223        let header = data[pos];
4224        let is_last = header & 0x80 != 0;
4225        let bt = header & 0x7F;
4226        let block_size = ((data[pos+1] as usize) << 16) | ((data[pos+2] as usize) << 8) | (data[pos+3] as usize);
4227        pos += 4;
4228        if pos + block_size > data.len() { break; }
4229        if bt == 0 {
4230            if let Ok(si) = flac::StreamInfo::parse(&data[pos..pos+block_size]) {
4231                let dict_ptr = dict.as_ptr();
4232                unsafe {
4233                    set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), si.length);
4234                    set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), si.sample_rate);
4235                    set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), si.channels as u32);
4236                    set_dict_u32(dict_ptr, pyo3::intern!(py, "bits_per_sample").as_ptr(), si.bits_per_sample as u32);
4237                    set_dict_i64(dict_ptr, pyo3::intern!(py, "total_samples").as_ptr(), si.total_samples as i64);
4238                }
4239                return Ok(true);
4240            }
4241        }
4242        pos += block_size;
4243        if is_last { break; }
4244    }
4245    Ok(false)
4246}
4247
4248/// OGG info only: parse identification header + last granule, skip VorbisComment.
4249#[inline(always)]
4250fn fast_info_ogg<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
4251    if data.len() < 58 || &data[0..4] != b"OggS" { return Ok(false); }
4252    let serial = u32::from_le_bytes([data[14], data[15], data[16], data[17]]);
4253    let num_seg = data[26] as usize;
4254    let seg_table_end = 27 + num_seg;
4255    if seg_table_end + 30 > data.len() { return Ok(false); }
4256    let id_data = &data[seg_table_end..];
4257    if id_data.len() < 30 || &id_data[0..7] != b"\x01vorbis" { return Ok(false); }
4258    let channels = id_data[11];
4259    let sample_rate = u32::from_le_bytes([id_data[12], id_data[13], id_data[14], id_data[15]]);
4260    let length = ogg::find_last_granule(data, serial)
4261        .map(|g| if g > 0 && sample_rate > 0 { g as f64 / sample_rate as f64 } else { 0.0 })
4262        .unwrap_or(0.0);
4263    let dict_ptr = dict.as_ptr();
4264    unsafe {
4265        set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), length);
4266        set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
4267        set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), channels as u32);
4268    }
4269    Ok(true)
4270}
4271
4272/// MP3 info only: parse MPEG frame header, skip ID3 tags.
4273#[inline(always)]
4274fn fast_info_mp3<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
4275    let file_size = data.len() as u64;
4276    let audio_start = if data.len() >= 10 {
4277        match id3::header::ID3Header::parse(&data[0..10], 0) {
4278            Ok(h) => {
4279                let tag_size = h.size as usize;
4280                if 10 + tag_size <= data.len() { h.full_size() as usize } else { 0 }
4281            }
4282            Err(_) => 0,
4283        }
4284    } else { 0 };
4285    let audio_end = data.len().min(audio_start + 8192);
4286    let audio_data = if audio_start < data.len() { &data[audio_start..audio_end] } else { &[] };
4287    let info = match mp3::MPEGInfo::parse(audio_data, 0, file_size.saturating_sub(audio_start as u64)) {
4288        Ok(i) => i,
4289        Err(_) => return Ok(false),
4290    };
4291    let dict_ptr = dict.as_ptr();
4292    unsafe {
4293        set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), info.length);
4294        set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), info.sample_rate);
4295        set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), info.channels);
4296        set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), info.bitrate);
4297    }
4298    Ok(true)
4299}
4300
4301/// MP4 info only: parse moov/mvhd + audio track, skip ilst tags.
4302#[inline(always)]
4303fn fast_info_mp4<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
4304    use mp4::atom::AtomIter;
4305    let moov = match AtomIter::new(data, 0, data.len()).find_name(b"moov") {
4306        Some(a) => a,
4307        None => return Ok(false),
4308    };
4309    let moov_s = moov.data_offset;
4310    let moov_e = moov_s + moov.data_size;
4311    let mut duration = 0u64;
4312    let mut timescale = 1000u32;
4313    if let Some(mvhd) = AtomIter::new(data, moov_s, moov_e).find_name(b"mvhd") {
4314        let d = &data[mvhd.data_offset..mvhd.data_offset + mvhd.data_size.min(32)];
4315        if !d.is_empty() {
4316            let version = d[0];
4317            if version == 0 && d.len() >= 20 {
4318                timescale = u32::from_be_bytes([d[12], d[13], d[14], d[15]]);
4319                duration = u32::from_be_bytes([d[16], d[17], d[18], d[19]]) as u64;
4320            } else if version == 1 && d.len() >= 32 {
4321                timescale = u32::from_be_bytes([d[20], d[21], d[22], d[23]]);
4322                duration = u64::from_be_bytes([d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]]);
4323            }
4324        }
4325    }
4326    let length = if timescale > 0 { duration as f64 / timescale as f64 } else { 0.0 };
4327    let mut channels = 2u32;
4328    let mut sample_rate = 44100u32;
4329    'trak: for trak in AtomIter::new(data, moov_s, moov_e) {
4330        if trak.name != *b"trak" { continue; }
4331        let ts = trak.data_offset;
4332        let te = ts + trak.data_size;
4333        let mdia = match AtomIter::new(data, ts, te).find_name(b"mdia") { Some(a) => a, None => continue };
4334        let ms = mdia.data_offset;
4335        let me = ms + mdia.data_size;
4336        let is_audio = AtomIter::new(data, ms, me).any(|a| {
4337            a.name == *b"hdlr" && {
4338                let d = &data[a.data_offset..a.data_offset + a.data_size.min(12)];
4339                d.len() >= 12 && &d[8..12] == b"soun"
4340            }
4341        });
4342        if !is_audio { continue; }
4343        let minf = match AtomIter::new(data, ms, me).find_name(b"minf") { Some(a) => a, None => continue };
4344        let stbl = match AtomIter::new(data, minf.data_offset, minf.data_offset + minf.data_size).find_name(b"stbl") { Some(a) => a, None => continue };
4345        let stsd = match AtomIter::new(data, stbl.data_offset, stbl.data_offset + stbl.data_size).find_name(b"stsd") { Some(a) => a, None => continue };
4346        let stsd_data = &data[stsd.data_offset..stsd.data_offset + stsd.data_size];
4347        if stsd_data.len() >= 16 {
4348            let entry = &stsd_data[8..];
4349            if entry.len() >= 36 {
4350                let audio = &entry[8..];
4351                if audio.len() >= 20 {
4352                    channels = u16::from_be_bytes([audio[16], audio[17]]) as u32;
4353                    if audio.len() >= 28 { sample_rate = u16::from_be_bytes([audio[24], audio[25]]) as u32; }
4354                }
4355            }
4356        }
4357        break 'trak;
4358    }
4359    let dict_ptr = dict.as_ptr();
4360    unsafe {
4361        set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), length);
4362        set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
4363        set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), channels);
4364    }
4365    Ok(true)
4366}
4367
4368/// Fast info-only read: returns dict with audio info (no tags).
4369/// Selective parsing — skips tag structures entirely for maximum speed.
4370#[pyfunction]
4371fn _fast_info(py: Python<'_>, filename: &str) -> PyResult<Py<PyAny>> {
4372    let data = fast_file_read(filename)
4373        .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
4374    let dict: Bound<'_, PyDict> = unsafe {
4375        let ptr = pyo3::ffi::PyDict_New();
4376        if ptr.is_null() {
4377            return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
4378        }
4379        Bound::from_owned_ptr(py, ptr).cast_into_unchecked()
4380    };
4381    let ext = filename.rsplit('.').next().unwrap_or("");
4382    let ok = if ext.eq_ignore_ascii_case("flac") {
4383        fast_info_flac(py, &data, &dict)?
4384    } else if ext.eq_ignore_ascii_case("ogg") {
4385        fast_info_ogg(py, &data, &dict)?
4386    } else if ext.eq_ignore_ascii_case("mp3") {
4387        fast_info_mp3(py, &data, &dict)?
4388    } else if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
4389            || ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
4390        fast_info_mp4(py, &data, &dict)?
4391    } else {
4392        false
4393    };
4394    if !ok {
4395        return Err(PyValueError::new_err(format!("Unable to parse: {}", filename)));
4396    }
4397    Ok(dict.into_any().unbind())
4398}
4399
4400/// Fast single-file read with two-tier caching + direct parsing:
4401///   Level 1 (warm): RESULT_CACHE → PyDict_Copy (~200ns)
4402///   Level 2 (cold): TEMPLATE_CACHE → PyDict_Copy (~200ns, template persists across clear_cache)
4403///   First read: std::fs::read → fast_read_*_direct → PyDict (no intermediary)
4404/// clear_cache() only clears Level 1. Templates persist until file is modified.
4405#[pyfunction]
4406fn _fast_read(py: Python<'_>, filename: &str) -> PyResult<Py<PyAny>> {
4407    // Level 1: Check result cache (warm path)
4408    {
4409        let rcache = get_result_cache();
4410        let guard = rcache.read().unwrap();
4411        if let Some(cached) = guard.get(filename) {
4412            let copy = unsafe { pyo3::ffi::PyDict_Copy(cached.as_ptr()) };
4413            if !copy.is_null() {
4414                return Ok(unsafe { Bound::from_owned_ptr(py, copy).unbind() });
4415            }
4416        }
4417    }
4418
4419    // Level 2: Check template cache (cold path — template PyDict persists across clear_cache)
4420    {
4421        let tcache = get_template_cache();
4422        let guard = tcache.read().unwrap();
4423        if let Some(template) = guard.get(filename) {
4424            let copy = unsafe { pyo3::ffi::PyDict_Copy(template.as_ptr()) };
4425            if !copy.is_null() {
4426                let result = unsafe { Bound::from_owned_ptr(py, copy) };
4427                // Store in result cache for subsequent warm reads
4428                {
4429                    let dict_ref: Bound<'_, PyDict> = unsafe { result.clone().cast_into_unchecked() };
4430                    let rcache = get_result_cache();
4431                    let mut guard = rcache.write().unwrap();
4432                    guard.insert(filename.to_string(), dict_ref.unbind());
4433                }
4434                return Ok(result.unbind());
4435            }
4436        }
4437    }
4438
4439    // First read: raw libc I/O + direct parsing (no intermediary structures)
4440    let data = fast_file_read(filename)
4441        .map_err(|e| PyIOError::new_err(format!("{}", e)))?;
4442
4443    let dict: Bound<'_, PyDict> = unsafe {
4444        let ptr = pyo3::ffi::PyDict_New();
4445        if ptr.is_null() {
4446            return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
4447        }
4448        Bound::from_owned_ptr(py, ptr).cast_into_unchecked()
4449    };
4450
4451    let ext = filename.rsplit('.').next().unwrap_or("");
4452    let ok = if ext.eq_ignore_ascii_case("flac") {
4453        fast_read_flac_direct(py, &data, data.len(), &dict)?
4454    } else if ext.eq_ignore_ascii_case("ogg") {
4455        fast_read_ogg_direct(py, &data, &dict)?
4456    } else if ext.eq_ignore_ascii_case("mp3") {
4457        fast_read_mp3_direct(py, &data, filename, &dict)?
4458    } else if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
4459            || ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
4460        fast_read_mp4_direct(py, &data, filename, &dict)?
4461    } else {
4462        // Unknown extension: try score-based detection
4463        let mp3_score = mp3::MP3File::score(filename, &data);
4464        let flac_score = flac::FLACFile::score(filename, &data);
4465        let ogg_score = ogg::OggVorbisFile::score(filename, &data);
4466        let mp4_score = mp4::MP4File::score(filename, &data);
4467        let max_score = mp3_score.max(flac_score).max(ogg_score).max(mp4_score);
4468        if max_score == 0 { false }
4469        else if max_score == flac_score { fast_read_flac_direct(py, &data, data.len(), &dict)? }
4470        else if max_score == ogg_score { fast_read_ogg_direct(py, &data, &dict)? }
4471        else if max_score == mp4_score { fast_read_mp4_direct(py, &data, filename, &dict)? }
4472        else { fast_read_mp3_direct(py, &data, filename, &dict)? }
4473    };
4474
4475    if !ok {
4476        return Err(PyValueError::new_err(format!("Unable to parse: {}", filename)));
4477    }
4478
4479    // Populate result + template caches (skip FILE_CACHE — populated lazily by read_cached)
4480    let key = filename.to_string();
4481    let dict_copy = dict.clone().unbind();
4482    {
4483        let tcache = get_template_cache();
4484        let mut guard = tcache.write().unwrap();
4485        guard.insert(key.clone(), dict_copy);
4486    }
4487    {
4488        let rcache = get_result_cache();
4489        let mut guard = rcache.write().unwrap();
4490        guard.insert(key, dict.clone().unbind());
4491    }
4492
4493    Ok(dict.into_any().unbind())
4494}
4495
4496/// Batch sequential read: processes all files in a single Rust call.
4497/// Eliminates per-file Python→Rust dispatch overhead.
4498/// Uses file cache for warm reads.
4499#[pyfunction]
4500fn _fast_read_seq(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
4501    unsafe {
4502        let result_ptr = pyo3::ffi::PyList_New(0);
4503        if result_ptr.is_null() {
4504            return Err(pyo3::exceptions::PyMemoryError::new_err("failed to create list"));
4505        }
4506
4507        for filename in &filenames {
4508            let data = match read_cached(filename) {
4509                Ok(d) => d,
4510                Err(_) => continue,
4511            };
4512
4513            let dict_ptr_raw = pyo3::ffi::PyDict_New();
4514            if dict_ptr_raw.is_null() { continue; }
4515            let dict: Bound<'_, PyDict> = Bound::from_owned_ptr(py, dict_ptr_raw).cast_into_unchecked();
4516            let ext = filename.rsplit('.').next().unwrap_or("");
4517
4518            let ok = if ext.eq_ignore_ascii_case("flac") {
4519                fast_read_flac_direct(py, &data, data.len(), &dict).unwrap_or(false)
4520            } else if ext.eq_ignore_ascii_case("ogg") {
4521                fast_read_ogg_direct(py, &data, &dict).unwrap_or(false)
4522            } else if ext.eq_ignore_ascii_case("mp3") {
4523                fast_read_mp3_direct(py, &data, filename, &dict).unwrap_or(false)
4524            } else if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
4525                    || ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
4526                fast_read_mp4_direct(py, &data, filename, &dict).unwrap_or(false)
4527            } else {
4528                if let Some(pf) = parse_and_serialize(&data, filename) {
4529                    preserialized_to_flat_dict(py, &pf, &dict).unwrap_or(());
4530                    true
4531                } else {
4532                    false
4533                }
4534            };
4535
4536            if ok {
4537                pyo3::ffi::PyList_Append(result_ptr, dict.as_ptr());
4538            }
4539        }
4540
4541        Ok(Bound::from_owned_ptr(py, result_ptr).unbind())
4542    }
4543}
4544
4545// ---- Module registration ----
4546
4547#[pymodule]
4548fn mutagen_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
4549    m.add_class::<PyMP3>()?;
4550    m.add_class::<PyMPEGInfo>()?;
4551    m.add_class::<PyID3>()?;
4552    m.add_class::<PyFLAC>()?;
4553    m.add_class::<PyStreamInfo>()?;
4554    m.add_class::<PyVComment>()?;
4555    m.add_class::<PyOggVorbis>()?;
4556    m.add_class::<PyOggVorbisInfo>()?;
4557    m.add_class::<PyMP4>()?;
4558    m.add_class::<PyMP4Info>()?;
4559    m.add_class::<PyMP4Tags>()?;
4560    m.add_class::<PyBatchResult>()?;
4561    m.add_class::<PyPOPM>()?;
4562
4563    m.add_function(wrap_pyfunction!(file_open, m)?)?;
4564    m.add_function(wrap_pyfunction!(batch_open, m)?)?;
4565    m.add_function(wrap_pyfunction!(batch_diag, m)?)?;
4566    m.add_function(wrap_pyfunction!(clear_cache, m)?)?;
4567    m.add_function(wrap_pyfunction!(clear_all_caches, m)?)?;
4568    m.add_function(wrap_pyfunction!(_rust_batch_open, m)?)?;
4569    m.add_function(wrap_pyfunction!(_fast_read, m)?)?;
4570    m.add_function(wrap_pyfunction!(_fast_info, m)?)?;
4571    m.add_function(wrap_pyfunction!(_fast_read_seq, m)?)?;
4572    m.add_function(wrap_pyfunction!(_fast_batch_read, m)?)?;
4573
4574    m.add("MutagenError", m.py().get_type::<common::error::MutagenPyError>())?;
4575    m.add("ID3Error", m.py().get_type::<common::error::ID3Error>())?;
4576    m.add("ID3NoHeaderError", m.py().get_type::<common::error::ID3NoHeaderError>())?;
4577    m.add("MP3Error", m.py().get_type::<common::error::MP3Error>())?;
4578    m.add("HeaderNotFoundError", m.py().get_type::<common::error::HeaderNotFoundError>())?;
4579    m.add("FLACError", m.py().get_type::<common::error::FLACError>())?;
4580    m.add("FLACNoHeaderError", m.py().get_type::<common::error::FLACNoHeaderError>())?;
4581    m.add("OggError", m.py().get_type::<common::error::OggError>())?;
4582    m.add("MP4Error", m.py().get_type::<common::error::MP4Error>())?;
4583
4584    m.add("File", wrap_pyfunction!(file_open, m)?)?;
4585
4586    Ok(())
4587}
4588} // mod python_bindings