pub mod common;
pub mod id3;
pub mod mp3;
pub mod flac;
pub mod ogg;
pub mod mp4;
pub mod vorbis;
#[cfg(feature = "python")]
use std::sync::{Arc, RwLock, OnceLock};
#[cfg(feature = "python")]
use std::collections::HashMap;
#[cfg(feature = "python")]
static FILE_CACHE: OnceLock<RwLock<HashMap<String, Arc<[u8]>>>> = OnceLock::new();
#[cfg(feature = "python")]
fn get_file_cache() -> &'static RwLock<HashMap<String, Arc<[u8]>>> {
FILE_CACHE.get_or_init(|| RwLock::new(HashMap::with_capacity(256)))
}
#[cfg(feature = "python")]
#[inline]
fn read_cached(path: &str) -> std::io::Result<Arc<[u8]>> {
let cache = get_file_cache();
{
let guard = cache.read().unwrap();
if let Some(data) = guard.get(path) {
return Ok(Arc::clone(data));
}
}
let data: Arc<[u8]> = fast_file_read(path)?.into();
{
let mut guard = cache.write().unwrap();
if let Some(existing) = guard.get(path) {
return Ok(Arc::clone(existing));
}
guard.insert(path.to_string(), Arc::clone(&data));
}
Ok(data)
}
#[cfg(feature = "python")]
#[inline]
fn fast_file_read(path: &str) -> std::io::Result<Vec<u8>> {
#[cfg(unix)]
{
fast_file_read_unix(path)
}
#[cfg(not(unix))]
{
std::fs::read(path)
}
}
#[cfg(all(feature = "python", unix))]
fn fast_file_read_unix(path: &str) -> std::io::Result<Vec<u8>> {
use std::io;
let path_bytes = path.as_bytes();
let mut c_buf = [0u8; 256];
let c_path: *const libc::c_char = if path_bytes.len() < 256 {
c_buf[..path_bytes.len()].copy_from_slice(path_bytes);
c_buf[path_bytes.len()] = 0;
c_buf.as_ptr() as *const libc::c_char
} else {
let mut v = Vec::with_capacity(path_bytes.len() + 1);
v.extend_from_slice(path_bytes);
v.push(0);
return std::fs::read(path);
};
#[cfg(target_os = "linux")]
let mut fd = unsafe { libc::open(c_path, libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOATIME) };
#[cfg(not(target_os = "linux"))]
let mut fd = unsafe { libc::open(c_path, libc::O_RDONLY | libc::O_CLOEXEC) };
if fd < 0 {
#[cfg(target_os = "linux")]
{
fd = unsafe { libc::open(c_path, libc::O_RDONLY | libc::O_CLOEXEC) };
}
if fd < 0 {
return Err(io::Error::last_os_error());
}
}
const FAST_BUF_SIZE: usize = 256 * 1024; thread_local! {
static FAST_BUF: std::cell::UnsafeCell<Vec<u8>> = std::cell::UnsafeCell::new(vec![0u8; FAST_BUF_SIZE]);
}
let result = FAST_BUF.with(|cell| {
let buf = unsafe { &mut *cell.get() };
let n = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, FAST_BUF_SIZE) };
if n < 0 {
unsafe { libc::close(fd); }
return Err(io::Error::last_os_error());
}
let n = n as usize;
if n < FAST_BUF_SIZE {
unsafe { libc::close(fd); }
Ok(buf[..n].to_vec())
} else {
let total_size = unsafe {
let mut stat: libc::stat = std::mem::zeroed();
if libc::fstat(fd, &mut stat) != 0 {
libc::close(fd);
return Err(io::Error::last_os_error());
}
stat.st_size as usize
};
let mut out = Vec::with_capacity(total_size);
out.extend_from_slice(&buf[..n]);
let remaining = total_size - n;
out.reserve(remaining);
unsafe {
let n2 = libc::read(fd, out.as_mut_ptr().add(n) as *mut libc::c_void, remaining);
libc::close(fd);
if n2 > 0 {
out.set_len(n + n2 as usize);
}
}
Ok(out)
}
});
result
}
#[cfg(feature = "python")]
mod python_bindings {
use super::*;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PyBytes, PyTuple};
use pyo3::exceptions::{PyValueError, PyKeyError, PyIOError};
#[pyclass(name = "POPM", skip_from_py_object)]
#[derive(Debug, Clone)]
struct PyPOPM {
#[pyo3(get)]
email: String,
#[pyo3(get)]
rating: u8,
#[pyo3(get)]
count: u64,
}
#[pymethods]
impl PyPOPM {
#[new]
fn new(email: String, rating: u8, count: u64) -> Self {
PyPOPM { email, rating, count }
}
fn __repr__(&self) -> String {
format!("POPM(email='{}', rating={}, count={})", self.email, self.rating, self.count)
}
fn __str__(&self) -> String {
self.__repr__()
}
}
#[pyclass(name = "MPEGInfo", from_py_object)]
#[derive(Debug, Clone)]
struct PyMPEGInfo {
#[pyo3(get)]
length: f64,
#[pyo3(get)]
channels: u32,
#[pyo3(get)]
bitrate: u32,
#[pyo3(get)]
sample_rate: u32,
#[pyo3(get)]
version: f64,
#[pyo3(get)]
layer: u8,
#[pyo3(get)]
mode: u32,
#[pyo3(get)]
protected: bool,
#[pyo3(get)]
bitrate_mode: u8,
#[pyo3(get)]
encoder_info: String,
#[pyo3(get)]
encoder_settings: String,
#[pyo3(get)]
track_gain: Option<f32>,
#[pyo3(get)]
track_peak: Option<f32>,
#[pyo3(get)]
album_gain: Option<f32>,
}
#[pymethods]
impl PyMPEGInfo {
fn __repr__(&self) -> String {
format!(
"MPEGInfo(length={:.2}, bitrate={}, sample_rate={}, channels={}, version={}, layer={})",
self.length, self.bitrate, self.sample_rate, self.channels, self.version, self.layer
)
}
fn pprint(&self) -> String {
format!(
"MPEG {} layer {} {:.2} seconds, {} bps, {} Hz",
self.version, self.layer, self.length, self.bitrate, self.sample_rate
)
}
}
#[pyclass(name = "ID3")]
#[derive(Debug)]
struct PyID3 {
tags: id3::tags::ID3Tags,
path: Option<String>,
version: (u8, u8),
}
#[pymethods]
impl PyID3 {
#[new]
#[pyo3(signature = (filename=None))]
fn new(filename: Option<&str>) -> PyResult<Self> {
match filename {
Some(path) => {
let (tags, header) = id3::load_id3(path)?;
let version = header.as_ref().map(|h| h.version).unwrap_or((4, 0));
Ok(PyID3 {
tags,
path: Some(path.to_string()),
version,
})
}
None => Ok(PyID3 {
tags: id3::tags::ID3Tags::new(),
path: None,
version: (4, 0),
}),
}
}
fn getall(&self, key: &str) -> PyResult<Vec<Py<PyAny>>> {
Python::attach(|py| {
let frames = self.tags.getall(key);
Ok(frames.iter().map(|f| frame_to_py(py, f)).collect())
})
}
fn keys(&self) -> Vec<String> {
self.tags.keys()
}
fn values(&self, py: Python) -> Vec<Py<PyAny>> {
self.tags.values().iter().map(|f| frame_to_py(py, f)).collect()
}
fn __getitem__(&mut self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
match self.tags.get_mut(key) {
Some(frame) => Ok(frame_to_py(py, frame)),
None => Err(PyKeyError::new_err(key.to_string())),
}
}
fn __setitem__(&mut self, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
let text = value.extract::<Vec<String>>().or_else(|_| {
value.extract::<String>().map(|s| vec![s])
})?;
let frame = id3::frames::Frame::Text(id3::frames::TextFrame {
id: key.to_string(),
encoding: id3::specs::Encoding::Utf8,
text,
});
let hash_key = frame.hash_key();
if let Some((_, frames)) = self.tags.frames.iter_mut().find(|(k, _)| k == &hash_key) {
*frames = vec![id3::tags::LazyFrame::Decoded(frame)];
} else {
self.tags.frames.push((hash_key, vec![id3::tags::LazyFrame::Decoded(frame)]));
}
Ok(())
}
fn __delitem__(&mut self, key: &str) -> PyResult<()> {
self.tags.delall(key);
Ok(())
}
fn __contains__(&self, key: &str) -> bool {
self.tags.get(key).is_some()
}
fn __len__(&self) -> usize {
self.tags.len()
}
fn __repr__(&self) -> String {
format!("ID3(keys={})", self.tags.keys().join(", "))
}
fn __iter__(&self, py: Python) -> PyResult<Py<PyAny>> {
let keys = self.tags.keys();
let list = PyList::new(py, &keys)?;
Ok(list.call_method0("__iter__")?.into())
}
fn save(&self, filename: Option<&str>) -> PyResult<()> {
let path = filename
.map(|s| s.to_string())
.or_else(|| self.path.clone())
.ok_or_else(|| PyValueError::new_err("No filename specified"))?;
id3::save_id3(&path, &self.tags, self.version.0.max(3))?;
invalidate_file(&path);
Ok(())
}
fn delete(&self, filename: Option<&str>) -> PyResult<()> {
let path = filename
.map(|s| s.to_string())
.or_else(|| self.path.clone())
.ok_or_else(|| PyValueError::new_err("No filename specified"))?;
id3::delete_id3(&path)?;
invalidate_file(&path);
Ok(())
}
fn pprint(&self) -> String {
let mut parts = Vec::new();
for frame in self.tags.values() {
parts.push(format!("{}={}", frame.frame_id(), frame.pprint()));
}
parts.join("\n")
}
#[getter]
fn version(&self) -> (u8, u8) {
self.version
}
}
#[pyclass(name = "MP3")]
struct PyMP3 {
#[pyo3(get)]
info: PyMPEGInfo,
#[pyo3(get)]
filename: String,
tag_dict: Py<PyDict>,
tag_keys: Vec<String>,
id3: PyID3,
}
impl PyMP3 {
#[inline(always)]
fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
let mut mp3_file = mp3::MP3File::parse(data, filename)?;
mp3_file.ensure_tags_parsed(data);
let info = make_mpeg_info(&mp3_file.info);
let version = mp3_file.id3_header.as_ref().map(|h| h.version).unwrap_or((4, 0));
let tag_dict = PyDict::new(py);
let mut tag_keys = Vec::with_capacity(mp3_file.tags.frames.len());
for (hash_key, frames) in mp3_file.tags.frames.iter_mut() {
if let Some(lf) = frames.first_mut() {
if let Ok(frame) = lf.decode_with_buf(&mp3_file.tags.raw_buf) {
let key_str = hash_key.as_str();
let _ = tag_dict.set_item(key_str, frame_to_py(py, frame));
tag_keys.push(key_str.to_string());
}
}
}
Ok(PyMP3 {
info,
filename: filename.to_string(),
tag_dict: tag_dict.into(),
tag_keys,
id3: PyID3 {
tags: mp3_file.tags,
path: Some(filename.to_string()),
version,
},
})
}
}
#[pymethods]
impl PyMP3 {
#[new]
fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
let data = read_cached(filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
Self::from_data(py, &data, filename)
}
#[getter]
fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
let id3 = PyID3 {
tags: self.id3.tags.clone(),
path: self.id3.path.clone(),
version: self.id3.version,
};
Ok(id3.into_pyobject(py)?.into_any().unbind())
}
fn keys(&self) -> Vec<String> {
self.tag_keys.clone()
}
#[inline(always)]
fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
let dict = self.tag_dict.bind(py);
match dict.get_item(key)? {
Some(val) => Ok(val.unbind()),
None => Err(PyKeyError::new_err(key.to_string())),
}
}
fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
let text = value.extract::<Vec<String>>().or_else(|_| {
value.extract::<String>().map(|s| vec![s])
})?;
let _ = self.tag_dict.bind(py).set_item(key, PyList::new(py, &text)?);
if !self.tag_keys.contains(&key.to_string()) {
self.tag_keys.push(key.to_string());
}
let frame = id3::frames::Frame::Text(id3::frames::TextFrame {
id: key.to_string(),
encoding: id3::specs::Encoding::Utf8,
text,
});
let hash_key = frame.hash_key();
if let Some((_, frames)) = self.id3.tags.frames.iter_mut().find(|(k, _)| k == &hash_key) {
*frames = vec![id3::tags::LazyFrame::Decoded(frame)];
} else {
self.id3.tags.frames.push((hash_key, vec![id3::tags::LazyFrame::Decoded(frame)]));
}
Ok(())
}
fn __contains__(&self, py: Python, key: &str) -> bool {
self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
}
fn __repr__(&self) -> String {
format!("MP3(filename={:?})", self.filename)
}
fn save(&self) -> PyResult<()> {
self.id3.save(Some(&self.filename))
}
fn delete(&self) -> PyResult<()> {
self.id3.delete(Some(&self.filename))
}
fn add_tags(&self) -> PyResult<()> {
Ok(())
}
fn clear(&mut self, py: Python) -> PyResult<()> {
self.id3.tags.frames.clear();
self.tag_keys.clear();
let dict = self.tag_dict.bind(py);
dict.clear();
Ok(())
}
fn pprint(&self) -> String {
format!("{}\n{}", self.info.pprint(), self.id3.pprint())
}
}
#[pyclass(name = "StreamInfo", from_py_object)]
#[derive(Debug, Clone)]
struct PyStreamInfo {
#[pyo3(get)]
length: f64,
#[pyo3(get)]
channels: u8,
#[pyo3(get)]
sample_rate: u32,
#[pyo3(get)]
bits_per_sample: u8,
#[pyo3(get)]
total_samples: u64,
#[pyo3(get)]
min_block_size: u16,
#[pyo3(get)]
max_block_size: u16,
#[pyo3(get)]
min_frame_size: u32,
#[pyo3(get)]
max_frame_size: u32,
#[pyo3(get)]
bitrate: u32,
}
#[pymethods]
impl PyStreamInfo {
fn __repr__(&self) -> String {
format!(
"StreamInfo(length={:.2}, sample_rate={}, channels={}, bits_per_sample={})",
self.length, self.sample_rate, self.channels, self.bits_per_sample
)
}
fn pprint(&self) -> String {
format!(
"FLAC, {:.2} seconds, {} Hz",
self.length, self.sample_rate
)
}
}
#[pyclass(name = "VComment", from_py_object)]
#[derive(Debug, Clone)]
struct PyVComment {
vc: vorbis::VorbisComment,
#[allow(dead_code)]
path: Option<String>,
}
#[pymethods]
impl PyVComment {
fn keys(&self) -> Vec<String> {
self.vc.keys()
}
#[inline(always)]
fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
let values = self.vc.get(key);
if values.is_empty() {
return Err(PyKeyError::new_err(key.to_string()));
}
Ok(PyList::new(py, values)?.into_any().unbind())
}
fn __setitem__(&mut self, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
let values = value.extract::<Vec<String>>().or_else(|_| {
value.extract::<String>().map(|s| vec![s])
})?;
self.vc.set(key, values);
Ok(())
}
fn __delitem__(&mut self, key: &str) -> PyResult<()> {
self.vc.delete(key);
Ok(())
}
fn __contains__(&self, key: &str) -> bool {
!self.vc.get(key).is_empty()
}
fn __len__(&self) -> usize {
self.vc.keys().len()
}
fn __iter__(&self, py: Python) -> PyResult<Py<PyAny>> {
let keys = self.vc.keys();
let list = PyList::new(py, &keys)?;
Ok(list.call_method0("__iter__")?.into())
}
fn __repr__(&self) -> String {
format!("VComment(keys={})", self.vc.keys().join(", "))
}
#[getter]
fn vendor(&self) -> &str {
&self.vc.vendor
}
}
#[pyclass(name = "FLAC")]
struct PyFLAC {
#[pyo3(get)]
info: PyStreamInfo,
#[pyo3(get)]
filename: String,
flac_file: flac::FLACFile,
vc_data: vorbis::VorbisComment,
tag_dict: Py<PyDict>,
tag_keys: Vec<String>,
}
impl PyFLAC {
#[inline(always)]
fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
let mut flac_file = flac::FLACFile::parse(data, filename)?;
let audio_data_size = data.len().saturating_sub(flac_file.flac_offset + flac_file.metadata_length);
let bitrate = if flac_file.info.length > 0.0 {
(audio_data_size as f64 * 8.0 / flac_file.info.length) as u32
} else { 0 };
let info = PyStreamInfo {
length: flac_file.info.length,
channels: flac_file.info.channels,
sample_rate: flac_file.info.sample_rate,
bits_per_sample: flac_file.info.bits_per_sample,
total_samples: flac_file.info.total_samples,
min_block_size: flac_file.info.min_block_size,
max_block_size: flac_file.info.max_block_size,
min_frame_size: flac_file.info.min_frame_size,
max_frame_size: flac_file.info.max_frame_size,
bitrate,
};
flac_file.ensure_tags();
let vc_data = flac_file.tags.clone().unwrap_or_else(|| vorbis::VorbisComment::new());
let tag_dict = PyDict::new(py);
let tag_keys = vc_data.keys();
for key in &tag_keys {
let values = vc_data.get(key);
if !values.is_empty() {
let _ = tag_dict.set_item(key.as_str(), PyList::new(py, values)?);
}
}
Ok(PyFLAC {
info,
filename: filename.to_string(),
flac_file,
vc_data,
tag_dict: tag_dict.into(),
tag_keys,
})
}
}
#[pymethods]
impl PyFLAC {
#[new]
fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
let data = read_cached(filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
Self::from_data(py, &data, filename)
}
#[getter]
fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
let vc = self.vc_data.clone();
let pvc = PyVComment { vc, path: Some(self.filename.clone()) };
Ok(pvc.into_pyobject(py)?.into_any().unbind())
}
fn keys(&self) -> Vec<String> {
self.tag_keys.clone()
}
#[inline(always)]
fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
let dict = self.tag_dict.bind(py);
match dict.get_item(key)? {
Some(val) => Ok(val.unbind()),
None => Err(PyKeyError::new_err(key.to_string())),
}
}
fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
let values = value.extract::<Vec<String>>().or_else(|_| {
value.extract::<String>().map(|s| vec![s])
})?;
let _ = self.tag_dict.bind(py).set_item(key, PyList::new(py, &values)?);
if !self.tag_keys.contains(&key.to_string()) {
self.tag_keys.push(key.to_string());
}
self.vc_data.set(key, values.clone());
if let Some(ref mut tags) = self.flac_file.tags {
tags.set(key, values);
}
Ok(())
}
fn __contains__(&self, py: Python, key: &str) -> bool {
self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
}
fn __repr__(&self) -> String {
format!("FLAC(filename={:?})", self.filename)
}
fn save(&self) -> PyResult<()> {
self.flac_file.save()?;
invalidate_file(&self.filename);
Ok(())
}
#[getter]
fn pictures(&self, py: Python) -> PyResult<Py<PyList>> {
let mut pics = Vec::new();
for lp in &self.flac_file.lazy_pictures {
let data = std::fs::read(&self.filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
if lp.block_offset + lp.block_size <= data.len() {
if let Ok(pic) = flac::FLACPicture::parse(&data[lp.block_offset..lp.block_offset + lp.block_size]) {
let dict = PyDict::new(py);
let _ = dict.set_item("type", pic.pic_type);
let _ = dict.set_item("mime", &pic.mime);
let _ = dict.set_item("desc", &pic.desc);
let _ = dict.set_item("width", pic.width);
let _ = dict.set_item("height", pic.height);
let _ = dict.set_item("depth", pic.depth);
let _ = dict.set_item("colors", pic.colors);
let _ = dict.set_item("data", pyo3::types::PyBytes::new(py, &pic.data));
pics.push(dict.into_any().unbind());
}
}
}
for pic in &self.flac_file.pictures {
let dict = PyDict::new(py);
let _ = dict.set_item("type", pic.pic_type);
let _ = dict.set_item("mime", &pic.mime);
let _ = dict.set_item("desc", &pic.desc);
let _ = dict.set_item("width", pic.width);
let _ = dict.set_item("height", pic.height);
let _ = dict.set_item("depth", pic.depth);
let _ = dict.set_item("colors", pic.colors);
let _ = dict.set_item("data", pyo3::types::PyBytes::new(py, &pic.data));
pics.push(dict.into_any().unbind());
}
Ok(PyList::new(py, pics)?.unbind())
}
fn delete(&self) -> PyResult<()> {
let mut flac_file = flac::FLACFile::open(&self.filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
flac_file.tags = Some(vorbis::VorbisComment::new());
flac_file.pictures.clear();
flac_file.lazy_pictures.clear();
flac_file.save()
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
invalidate_file(&self.filename);
Ok(())
}
fn add_tags(&mut self) -> PyResult<()> {
self.flac_file.ensure_tags();
Ok(())
}
fn clear(&mut self, py: Python) -> PyResult<()> {
self.vc_data = vorbis::VorbisComment::new();
self.tag_keys.clear();
let dict = self.tag_dict.bind(py);
dict.clear();
if let Some(ref mut tags) = self.flac_file.tags {
*tags = vorbis::VorbisComment::new();
}
Ok(())
}
}
#[pyclass(name = "OggVorbisInfo", from_py_object)]
#[derive(Debug, Clone)]
struct PyOggVorbisInfo {
#[pyo3(get)]
length: f64,
#[pyo3(get)]
channels: u8,
#[pyo3(get)]
sample_rate: u32,
#[pyo3(get)]
bitrate: u32,
}
#[pymethods]
impl PyOggVorbisInfo {
fn __repr__(&self) -> String {
format!(
"OggVorbisInfo(length={:.2}, sample_rate={}, channels={})",
self.length, self.sample_rate, self.channels
)
}
fn pprint(&self) -> String {
format!(
"Ogg Vorbis, {:.2} seconds, {} Hz",
self.length, self.sample_rate
)
}
}
#[pyclass(name = "OggVorbis")]
struct PyOggVorbis {
#[pyo3(get)]
info: PyOggVorbisInfo,
#[pyo3(get)]
filename: String,
vc: PyVComment,
tag_dict: Py<PyDict>,
tag_keys: Vec<String>,
}
impl PyOggVorbis {
#[inline(always)]
fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
let mut ogg_file = ogg::OggVorbisFile::parse(data, filename)?;
ogg_file.ensure_full_parse(data);
ogg_file.ensure_tags();
let info = PyOggVorbisInfo {
length: ogg_file.info.length,
channels: ogg_file.info.channels,
sample_rate: ogg_file.info.sample_rate,
bitrate: ogg_file.info.bitrate,
};
let tag_dict = PyDict::new(py);
let tag_keys = ogg_file.tags.keys();
for key in &tag_keys {
let values = ogg_file.tags.get(key);
if !values.is_empty() {
let _ = tag_dict.set_item(key.as_str(), PyList::new(py, values)?);
}
}
let vc = PyVComment {
vc: ogg_file.tags,
path: Some(filename.to_string()),
};
Ok(PyOggVorbis {
info,
filename: filename.to_string(),
vc,
tag_dict: tag_dict.into(),
tag_keys,
})
}
}
#[pymethods]
impl PyOggVorbis {
#[new]
fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
let data = read_cached(filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
Self::from_data(py, &data, filename)
}
#[getter]
fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
let vc = self.vc.clone();
Ok(vc.into_pyobject(py)?.into_any().unbind())
}
fn keys(&self) -> Vec<String> {
self.tag_keys.clone()
}
#[inline(always)]
fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
let dict = self.tag_dict.bind(py);
match dict.get_item(key)? {
Some(val) => Ok(val.unbind()),
None => Err(PyKeyError::new_err(key.to_string())),
}
}
fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
let values = value.extract::<Vec<String>>().or_else(|_| {
value.extract::<String>().map(|s| vec![s])
})?;
self.vc.vc.set(key, values.clone());
let _ = self.tag_dict.bind(py).set_item(key, PyList::new(py, &values)?);
if !self.tag_keys.contains(&key.to_string()) {
self.tag_keys.push(key.to_string());
}
Ok(())
}
fn __contains__(&self, py: Python, key: &str) -> bool {
self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
}
fn __repr__(&self) -> String {
format!("OggVorbis(filename={:?})", self.filename)
}
fn save(&self) -> PyResult<()> {
let data = read_cached(&self.filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
let mut ogg_file = ogg::OggVorbisFile::parse(&data, &self.filename)
.map_err(|e| PyValueError::new_err(format!("{}", e)))?;
ogg_file.tags = self.vc.vc.clone();
ogg_file.save()
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
invalidate_file(&self.filename);
Ok(())
}
fn delete(&self) -> PyResult<()> {
let data = read_cached(&self.filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
let mut ogg_file = ogg::OggVorbisFile::parse(&data, &self.filename)
.map_err(|e| PyValueError::new_err(format!("{}", e)))?;
ogg_file.tags = vorbis::VorbisComment::new();
ogg_file.save()
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
invalidate_file(&self.filename);
Ok(())
}
fn add_tags(&self) -> PyResult<()> {
Ok(())
}
fn clear(&mut self, py: Python) -> PyResult<()> {
self.vc.vc = vorbis::VorbisComment::new();
self.tag_keys.clear();
let dict = self.tag_dict.bind(py);
dict.clear();
Ok(())
}
}
#[pyclass(name = "MP4Info", from_py_object)]
#[derive(Debug, Clone)]
struct PyMP4Info {
#[pyo3(get)]
length: f64,
#[pyo3(get)]
channels: u32,
#[pyo3(get)]
sample_rate: u32,
#[pyo3(get)]
bitrate: u32,
#[pyo3(get)]
bits_per_sample: u32,
#[pyo3(get)]
codec: String,
#[pyo3(get)]
codec_description: String,
}
#[pymethods]
impl PyMP4Info {
fn __repr__(&self) -> String {
format!(
"MP4Info(length={:.2}, codec={}, channels={}, sample_rate={})",
self.length, self.codec, self.channels, self.sample_rate
)
}
fn pprint(&self) -> String {
format!(
"MPEG-4 audio ({}), {:.2} seconds, {} bps",
self.codec, self.length, self.bitrate
)
}
}
#[pyclass(name = "MP4Tags", from_py_object)]
#[derive(Debug, Clone)]
struct PyMP4Tags {
tags: mp4::MP4Tags,
}
#[pymethods]
impl PyMP4Tags {
fn keys(&self) -> Vec<String> {
self.tags.keys()
}
fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
match self.tags.get(key) {
Some(value) => mp4_value_to_py(py, value),
None => Err(PyKeyError::new_err(key.to_string())),
}
}
fn __contains__(&self, key: &str) -> bool {
self.tags.contains_key(key)
}
fn __len__(&self) -> usize {
self.tags.items.len()
}
fn __iter__(&self, py: Python) -> PyResult<Py<PyAny>> {
let keys = self.tags.keys();
let list = PyList::new(py, &keys)?;
Ok(list.call_method0("__iter__")?.into())
}
fn __repr__(&self) -> String {
format!("MP4Tags(keys={})", self.tags.keys().join(", "))
}
}
#[pyclass(name = "MP4")]
struct PyMP4 {
#[pyo3(get)]
info: PyMP4Info,
#[pyo3(get)]
filename: String,
mp4_tags: PyMP4Tags,
tag_dict: Py<PyDict>,
tag_keys: Vec<String>,
}
impl PyMP4 {
#[inline(always)]
fn from_data(py: Python<'_>, data: &[u8], filename: &str) -> PyResult<Self> {
let mut mp4_file = mp4::MP4File::parse(data, filename)?;
mp4_file.ensure_parsed_with_data(data);
let info = PyMP4Info {
length: mp4_file.info.length,
channels: mp4_file.info.channels,
sample_rate: mp4_file.info.sample_rate,
bitrate: mp4_file.info.bitrate,
bits_per_sample: mp4_file.info.bits_per_sample,
codec: mp4_file.info.codec,
codec_description: mp4_file.info.codec_description,
};
let tag_dict = PyDict::new(py);
let tag_keys = mp4_file.tags.keys();
for key in &tag_keys {
if let Some(value) = mp4_file.tags.get(key) {
if let Ok(py_val) = mp4_value_to_py(py, value) {
let _ = tag_dict.set_item(key.as_str(), py_val);
}
}
}
let mp4_tags = PyMP4Tags {
tags: mp4_file.tags,
};
Ok(PyMP4 {
info,
filename: filename.to_string(),
mp4_tags,
tag_dict: tag_dict.into(),
tag_keys,
})
}
}
#[pymethods]
impl PyMP4 {
#[new]
fn new(py: Python<'_>, filename: &str) -> PyResult<Self> {
let data = read_cached(filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
Self::from_data(py, &data, filename)
}
#[getter]
fn tags(&self, py: Python) -> PyResult<Py<PyAny>> {
let tags = self.mp4_tags.clone();
Ok(tags.into_pyobject(py)?.into_any().unbind())
}
fn keys(&self) -> Vec<String> {
self.tag_keys.clone()
}
#[inline(always)]
fn __getitem__(&self, py: Python, key: &str) -> PyResult<Py<PyAny>> {
let dict = self.tag_dict.bind(py);
match dict.get_item(key)? {
Some(val) => Ok(val.unbind()),
None => Err(PyKeyError::new_err(key.to_string())),
}
}
fn __setitem__(&mut self, py: Python, key: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
let tag_value = py_to_mp4_value(key, value)?;
let py_val = mp4_value_to_py(py, &tag_value)?;
let _ = self.tag_dict.bind(py).set_item(key, py_val);
if !self.tag_keys.contains(&key.to_string()) {
self.tag_keys.push(key.to_string());
}
self.mp4_tags.tags.set(key, tag_value);
Ok(())
}
fn __delitem__(&mut self, py: Python, key: &str) -> PyResult<()> {
let dict = self.tag_dict.bind(py);
if dict.get_item(key)?.is_none() {
return Err(PyKeyError::new_err(key.to_string()));
}
dict.del_item(key)?;
self.tag_keys.retain(|k| k != key);
self.mp4_tags.tags.delete(key);
Ok(())
}
fn __contains__(&self, py: Python, key: &str) -> bool {
self.tag_dict.bind(py).get_item(key).ok().flatten().is_some()
}
fn save(&self) -> PyResult<()> {
mp4::save_mp4_tags(&self.filename, &self.mp4_tags.tags)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
invalidate_file(&self.filename);
Ok(())
}
fn delete(&self) -> PyResult<()> {
let empty = mp4::MP4Tags::new();
mp4::save_mp4_tags(&self.filename, &empty)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
invalidate_file(&self.filename);
Ok(())
}
fn __repr__(&self) -> String {
format!("MP4(filename={:?})", self.filename)
}
fn add_tags(&self) -> PyResult<()> {
Ok(())
}
fn clear(&mut self, py: Python) -> PyResult<()> {
self.mp4_tags.tags.items.clear();
self.tag_keys.clear();
let dict = self.tag_dict.bind(py);
dict.clear();
Ok(())
}
}
#[inline(always)]
fn make_mpeg_info(info: &mp3::MPEGInfo) -> PyMPEGInfo {
PyMPEGInfo {
length: info.length,
channels: info.channels,
bitrate: info.bitrate,
sample_rate: info.sample_rate,
version: info.version,
layer: info.layer,
mode: info.mode,
protected: info.protected,
bitrate_mode: match info.bitrate_mode {
mp3::xing::BitrateMode::Unknown => 0,
mp3::xing::BitrateMode::CBR => 1,
mp3::xing::BitrateMode::VBR => 2,
mp3::xing::BitrateMode::ABR => 3,
},
encoder_info: info.encoder_info.clone(),
encoder_settings: info.encoder_settings.clone(),
track_gain: info.track_gain,
track_peak: info.track_peak,
album_gain: info.album_gain,
}
}
#[inline(always)]
fn frame_to_py(py: Python, frame: &id3::frames::Frame) -> Py<PyAny> {
match frame {
id3::frames::Frame::Text(f) => {
if f.text.len() == 1 {
f.text[0].as_str().into_pyobject(py).unwrap().into_any().unbind()
} else {
let list = PyList::new(py, &f.text).unwrap();
list.into_any().unbind()
}
}
id3::frames::Frame::UserText(f) => {
if f.text.len() == 1 {
f.text[0].as_str().into_pyobject(py).unwrap().into_any().unbind()
} else {
let list = PyList::new(py, &f.text).unwrap();
list.into_any().unbind()
}
}
id3::frames::Frame::Url(f) => {
f.url.as_str().into_pyobject(py).unwrap().into_any().unbind()
}
id3::frames::Frame::UserUrl(f) => {
f.url.as_str().into_pyobject(py).unwrap().into_any().unbind()
}
id3::frames::Frame::Comment(f) => {
f.text.as_str().into_pyobject(py).unwrap().into_any().unbind()
}
id3::frames::Frame::Lyrics(f) => {
f.text.as_str().into_pyobject(py).unwrap().into_any().unbind()
}
id3::frames::Frame::Picture(f) => {
let dict = PyDict::new(py);
dict.set_item("mime", &f.mime).unwrap();
dict.set_item("type", f.pic_type as u8).unwrap();
dict.set_item("desc", &f.desc).unwrap();
dict.set_item("data", PyBytes::new(py, &f.data)).unwrap();
dict.into_any().unbind()
}
id3::frames::Frame::Popularimeter(f) => {
Py::new(py, PyPOPM {
email: f.email.clone(),
rating: f.rating,
count: f.count,
}).unwrap().into_any()
}
id3::frames::Frame::Binary(f) => {
PyBytes::new(py, &f.data).into_any().unbind()
}
id3::frames::Frame::PairedText(f) => {
let pairs: Vec<(&str, &str)> = f.people.iter().map(|(a, b)| (a.as_str(), b.as_str())).collect();
let list = PyList::new(py, &pairs).unwrap();
list.into_any().unbind()
}
}
}
#[inline(always)]
fn mp4_value_to_py(py: Python, value: &mp4::MP4TagValue) -> PyResult<Py<PyAny>> {
match value {
mp4::MP4TagValue::Text(v) => {
if v.len() == 1 {
Ok(v[0].as_str().into_pyobject(py)?.into_any().unbind())
} else {
Ok(PyList::new(py, v)?.into_any().unbind())
}
}
mp4::MP4TagValue::Integer(v) => {
if v.len() == 1 {
Ok(v[0].into_pyobject(py)?.into_any().unbind())
} else {
Ok(PyList::new(py, v)?.into_any().unbind())
}
}
mp4::MP4TagValue::IntPair(v) => {
let pairs: Vec<_> = v.iter().map(|(a, b)| (*a, *b)).collect();
if pairs.len() == 1 {
Ok(PyTuple::new(py, &[pairs[0].0, pairs[0].1])?.into_any().unbind())
} else {
let list = PyList::empty(py);
for (a, b) in &pairs {
list.append(PyTuple::new(py, &[*a, *b])?)?;
}
Ok(list.into_any().unbind())
}
}
mp4::MP4TagValue::Bool(v) => {
Ok((*v).into_pyobject(py)?.to_owned().into_any().unbind())
}
mp4::MP4TagValue::Cover(covers) => {
let list = PyList::empty(py);
for cover in covers {
let dict = PyDict::new(py);
dict.set_item("data", PyBytes::new(py, &cover.data))?;
dict.set_item("format", cover.format as u8)?;
list.append(dict)?;
}
Ok(list.into_any().unbind())
}
mp4::MP4TagValue::FreeForm(forms) => {
let list = PyList::empty(py);
for form in forms {
list.append(PyBytes::new(py, &form.data))?;
}
Ok(list.into_any().unbind())
}
mp4::MP4TagValue::Data(d) => {
Ok(PyBytes::new(py, d).into_any().unbind())
}
}
}
fn py_to_mp4_value(key: &str, value: &Bound<'_, PyAny>) -> PyResult<mp4::MP4TagValue> {
if key == "covr" {
if let Ok(list) = value.cast::<PyList>() {
let mut covers = Vec::new();
for item in list.iter() {
if let Ok(data) = item.extract::<Vec<u8>>() {
let fmt = if data.starts_with(b"\x89PNG") {
mp4::MP4CoverFormat::PNG
} else {
mp4::MP4CoverFormat::JPEG
};
covers.push(mp4::MP4Cover { data, format: fmt });
} else if let Ok(dict) = item.cast::<PyDict>() {
if let (Some(data_obj), Some(fmt_obj)) = (dict.get_item("data")?, dict.get_item("format")?) {
let data = data_obj.extract::<Vec<u8>>()?;
let fmt_int = fmt_obj.extract::<u32>().unwrap_or(13);
let format = if fmt_int == 14 { mp4::MP4CoverFormat::PNG } else { mp4::MP4CoverFormat::JPEG };
covers.push(mp4::MP4Cover { data, format });
}
}
}
if !covers.is_empty() {
return Ok(mp4::MP4TagValue::Cover(covers));
}
}
if let Ok(data) = value.extract::<Vec<u8>>() {
let fmt = if data.starts_with(b"\x89PNG") {
mp4::MP4CoverFormat::PNG
} else {
mp4::MP4CoverFormat::JPEG
};
return Ok(mp4::MP4TagValue::Cover(vec![mp4::MP4Cover { data, format: fmt }]));
}
}
if key == "trkn" || key == "disk" {
if let Ok(pairs) = value.extract::<Vec<(i32, i32)>>() {
return Ok(mp4::MP4TagValue::IntPair(pairs));
}
if let Ok(pair) = value.extract::<(i32, i32)>() {
return Ok(mp4::MP4TagValue::IntPair(vec![pair]));
}
}
if let Ok(strings) = value.extract::<Vec<String>>() {
return Ok(mp4::MP4TagValue::Text(strings));
}
if let Ok(s) = value.extract::<String>() {
return Ok(mp4::MP4TagValue::Text(vec![s]));
}
if let Ok(b) = value.extract::<bool>() {
return Ok(mp4::MP4TagValue::Bool(b));
}
if let Ok(i) = value.extract::<i64>() {
return Ok(mp4::MP4TagValue::Integer(vec![i]));
}
if let Ok(ints) = value.extract::<Vec<i64>>() {
return Ok(mp4::MP4TagValue::Integer(ints));
}
if let Ok(data) = value.extract::<Vec<u8>>() {
return Ok(mp4::MP4TagValue::Data(data));
}
if let Ok(list) = value.cast::<PyList>() {
let mut forms = Vec::new();
for item in list.iter() {
if let Ok(data) = item.extract::<Vec<u8>>() {
forms.push(mp4::MP4FreeForm { data, dataformat: 1 });
}
}
if !forms.is_empty() {
return Ok(mp4::MP4TagValue::FreeForm(forms));
}
}
Err(PyValueError::new_err(format!(
"Cannot convert value for MP4 key '{}': unsupported type", key
)))
}
#[derive(Clone)]
enum BatchTagValue {
Text(String),
TextList(Vec<String>),
Bytes(Vec<u8>),
Int(i64),
IntPair(i32, i32),
Bool(bool),
Picture { mime: String, pic_type: u8, desc: String, data: Vec<u8> },
Popularimeter { email: String, rating: u8, count: u64 },
PairedText(Vec<(String, String)>),
CoverList(Vec<(Vec<u8>, u8)>),
FreeFormList(Vec<Vec<u8>>),
}
#[derive(Clone)]
struct PreSerializedFile {
length: f64,
sample_rate: u32,
channels: u32,
bitrate: Option<u32>,
tags: Vec<(String, BatchTagValue)>,
extra: Vec<(&'static str, BatchTagValue)>,
lazy_vc: Option<Vec<u8>>,
}
#[inline(always)]
fn frame_to_batch_value(frame: &id3::frames::Frame) -> BatchTagValue {
match frame {
id3::frames::Frame::Text(f) => {
if f.text.len() == 1 {
BatchTagValue::Text(f.text[0].clone())
} else {
BatchTagValue::TextList(f.text.clone())
}
}
id3::frames::Frame::UserText(f) => {
if f.text.len() == 1 {
BatchTagValue::Text(f.text[0].clone())
} else {
BatchTagValue::TextList(f.text.clone())
}
}
id3::frames::Frame::Url(f) => BatchTagValue::Text(f.url.clone()),
id3::frames::Frame::UserUrl(f) => BatchTagValue::Text(f.url.clone()),
id3::frames::Frame::Comment(f) => BatchTagValue::Text(f.text.clone()),
id3::frames::Frame::Lyrics(f) => BatchTagValue::Text(f.text.clone()),
id3::frames::Frame::Picture(f) => BatchTagValue::Picture {
mime: f.mime.clone(),
pic_type: f.pic_type as u8,
desc: f.desc.clone(),
data: f.data.clone(),
},
id3::frames::Frame::Popularimeter(f) => BatchTagValue::Popularimeter {
email: f.email.clone(),
rating: f.rating,
count: f.count,
},
id3::frames::Frame::Binary(f) => BatchTagValue::Bytes(f.data.clone()),
id3::frames::Frame::PairedText(f) => BatchTagValue::PairedText(f.people.clone()),
}
}
#[inline(always)]
fn parse_vc_to_batch_tags(data: &[u8]) -> Vec<(String, BatchTagValue)> {
if data.len() < 8 { return Vec::new(); }
let mut pos = 0usize;
let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + vendor_len > data.len() { return Vec::new(); }
pos += vendor_len;
if pos + 4 > data.len() { return Vec::new(); }
let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
let mut tags: Vec<(String, BatchTagValue)> = Vec::with_capacity(count.min(64));
for _ in 0..count {
if pos + 4 > data.len() { break; }
let comment_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + comment_len > data.len() { break; }
let raw = &data[pos..pos + comment_len];
pos += comment_len;
let eq_pos = match memchr::memchr(b'=', raw) {
Some(p) => p,
None => continue,
};
let key_bytes = &raw[..eq_pos];
let value_bytes = &raw[eq_pos + 1..];
let key = if key_bytes.iter().all(|&b| !b.is_ascii_uppercase()) {
match std::str::from_utf8(key_bytes) {
Ok(s) => s.to_string(),
Err(_) => continue,
}
} else {
let mut k = String::with_capacity(key_bytes.len());
for &b in key_bytes {
k.push(if b.is_ascii_uppercase() { (b + 32) as char } else { b as char });
}
k
};
let value = match std::str::from_utf8(value_bytes) {
Ok(s) => s.to_string(),
Err(_) => String::from_utf8_lossy(value_bytes).into_owned(),
};
if let Some(entry) = tags.iter_mut().find(|(k, _)| k == &key) {
if let BatchTagValue::TextList(ref mut v) = entry.1 {
v.push(value);
}
} else {
tags.push((key, BatchTagValue::TextList(vec![value])));
}
}
tags
}
#[inline(always)]
fn parse_flac_batch(data: &[u8], file_size: usize) -> Option<PreSerializedFile> {
let flac_offset = if data.len() >= 4 && &data[0..4] == b"fLaC" {
0
} else if data.len() >= 10 && &data[0..3] == b"ID3" {
let size = crate::id3::header::BitPaddedInt::syncsafe(&data[6..10]) as usize;
let off = 10 + size;
if off + 4 > data.len() || &data[off..off+4] != b"fLaC" { return None; }
off
} else {
return None;
};
let mut pos = flac_offset + 4;
let mut sample_rate = 0u32;
let mut channels = 0u8;
let mut length = 0.0f64;
let mut bits_per_sample = 0u8;
let mut total_samples = 0u64;
let mut vc_pos: Option<(usize, usize)> = None;
loop {
if pos + 4 > data.len() { break; }
let header = data[pos];
let is_last = header & 0x80 != 0;
let bt = header & 0x7F;
let block_size = ((data[pos+1] as usize) << 16) | ((data[pos+2] as usize) << 8) | (data[pos+3] as usize);
pos += 4;
if pos + block_size > data.len() { break; }
match bt {
0 => {
if let Ok(si) = flac::StreamInfo::parse(&data[pos..pos+block_size]) {
sample_rate = si.sample_rate;
channels = si.channels;
length = si.length;
bits_per_sample = si.bits_per_sample;
total_samples = si.total_samples;
}
}
4 => {
let vc_size = flac::compute_vc_data_size(&data[pos..]).unwrap_or(block_size);
vc_pos = Some((pos, vc_size));
}
_ => {}
}
pos += block_size;
if is_last || (sample_rate > 0 && vc_pos.is_some()) { break; }
}
if sample_rate == 0 { return None; }
let lazy_vc = vc_pos.map(|(off, sz)| data[off..off.saturating_add(sz).min(data.len())].to_vec());
let audio_data_size = file_size.saturating_sub(pos);
let bitrate = if length > 0.0 {
Some((audio_data_size as f64 * 8.0 / length) as u32)
} else { None };
Some(PreSerializedFile {
length,
sample_rate,
channels: channels as u32,
bitrate,
tags: Vec::new(),
extra: vec![
("bits_per_sample", BatchTagValue::Int(bits_per_sample as i64)),
("total_samples", BatchTagValue::Int(total_samples as i64)),
],
lazy_vc,
})
}
#[inline(always)]
fn parse_ogg_batch(data: &[u8]) -> Option<PreSerializedFile> {
if data.len() < 58 || &data[0..4] != b"OggS" { return None; }
let serial = u32::from_le_bytes([data[14], data[15], data[16], data[17]]);
let num_seg = data[26] as usize;
let seg_table_end = 27 + num_seg;
if seg_table_end > data.len() { return None; }
let page_data_size: usize = data[27..seg_table_end].iter().map(|&s| s as usize).sum();
let first_page_end = seg_table_end + page_data_size;
if seg_table_end + 30 > data.len() { return None; }
let id_data = &data[seg_table_end..];
if id_data.len() < 30 || &id_data[0..7] != b"\x01vorbis" { return None; }
let channels = id_data[11];
let sample_rate = u32::from_le_bytes([id_data[12], id_data[13], id_data[14], id_data[15]]);
let nominal_bitrate = u32::from_le_bytes([id_data[20], id_data[21], id_data[22], id_data[23]]);
if first_page_end + 27 > data.len() { return None; }
if &data[first_page_end..first_page_end+4] != b"OggS" { return None; }
let seg2_count = data[first_page_end + 26] as usize;
let seg2_table_start = first_page_end + 27;
let seg2_table_end = seg2_table_start + seg2_count;
if seg2_table_end > data.len() { return None; }
let seg2_table = &data[seg2_table_start..seg2_table_end];
let mut first_packet_size = 0usize;
let mut single_page = false;
for &seg in seg2_table {
first_packet_size += seg as usize;
if seg < 255 { single_page = true; break; }
}
let length = ogg::find_last_granule(data, serial)
.map(|g| if g > 0 && sample_rate > 0 { g as f64 / sample_rate as f64 } else { 0.0 })
.unwrap_or(0.0);
let bitrate = if nominal_bitrate > 0 {
Some(nominal_bitrate)
} else if length > 0.0 {
Some((data.len() as f64 * 8.0 / length) as u32)
} else { None };
let lazy_vc = if single_page {
let comment_start = seg2_table_end;
if comment_start + first_packet_size > data.len() { return None; }
if first_packet_size < 7 { return None; }
if &data[comment_start..comment_start+7] != b"\x03vorbis" { return None; }
Some(data[comment_start + 7..comment_start + first_packet_size].to_vec())
} else {
let comment_packet = ogg::ogg_assemble_first_packet(data, first_page_end)?;
if comment_packet.len() < 7 { return None; }
if &comment_packet[0..7] != b"\x03vorbis" { return None; }
Some(comment_packet[7..].to_vec())
};
Some(PreSerializedFile {
length,
sample_rate,
channels: channels as u32,
bitrate,
tags: Vec::new(),
extra: Vec::new(),
lazy_vc,
})
}
#[inline(always)]
fn mp4_value_to_batch(value: &mp4::MP4TagValue) -> BatchTagValue {
match value {
mp4::MP4TagValue::Text(v) => {
if v.len() == 1 { BatchTagValue::Text(v[0].clone()) }
else { BatchTagValue::TextList(v.clone()) }
}
mp4::MP4TagValue::Integer(v) => {
if v.len() == 1 { BatchTagValue::Int(v[0] as i64) }
else { BatchTagValue::TextList(v.iter().map(|i| itoa::Buffer::new().format(*i).to_string()).collect()) }
}
mp4::MP4TagValue::IntPair(v) => {
if v.len() == 1 { BatchTagValue::IntPair(v[0].0, v[0].1) }
else { BatchTagValue::TextList(v.iter().map(|(a,b)| { let mut s = String::with_capacity(12); s.push('('); s.push_str(itoa::Buffer::new().format(*a)); s.push(','); s.push_str(itoa::Buffer::new().format(*b)); s.push(')'); s }).collect()) }
}
mp4::MP4TagValue::Bool(v) => BatchTagValue::Bool(*v),
mp4::MP4TagValue::Cover(covers) => {
BatchTagValue::CoverList(covers.iter().map(|c| (c.data.clone(), c.format as u8)).collect())
}
mp4::MP4TagValue::FreeForm(forms) => {
BatchTagValue::FreeFormList(forms.iter().map(|f| f.data.clone()).collect())
}
mp4::MP4TagValue::Data(d) => BatchTagValue::Bytes(d.clone()),
}
}
#[inline(always)]
fn parse_mp3_batch(data: &[u8], path: &str) -> Option<PreSerializedFile> {
let mut f = mp3::MP3File::parse(data, path).ok()?;
f.ensure_tags_parsed(data);
let mut tags = Vec::with_capacity(f.tags.frames.len());
let mut has_tdrc = f.tags.frames.iter().any(|(k, _)| k.as_str() == "TDRC");
for (hash_key, frames) in f.tags.frames.iter_mut() {
if let Some(lf) = frames.first_mut() {
if let Ok(frame) = lf.decode_with_buf(&f.tags.raw_buf) {
let key = hash_key.as_str();
if key == "TYER" {
if has_tdrc { continue; }
has_tdrc = true;
tags.push(("TDRC".to_string(), frame_to_batch_value(frame)));
} else {
tags.push((key.to_string(), frame_to_batch_value(frame)));
}
}
}
}
let extra = vec![
("version", BatchTagValue::Text(ryu::Buffer::new().format(f.info.version).to_string())),
("layer", BatchTagValue::Int(f.info.layer as i64)),
("mode", BatchTagValue::Int(f.info.mode as i64)),
("protected", BatchTagValue::Bool(f.info.protected)),
("bitrate_mode", BatchTagValue::Int(match f.info.bitrate_mode {
mp3::xing::BitrateMode::Unknown => 0,
mp3::xing::BitrateMode::CBR => 1,
mp3::xing::BitrateMode::VBR => 2,
mp3::xing::BitrateMode::ABR => 3,
})),
];
Some(PreSerializedFile {
length: f.info.length,
sample_rate: f.info.sample_rate,
channels: f.info.channels,
bitrate: Some(f.info.bitrate),
tags,
extra,
lazy_vc: None,
})
}
#[inline(always)]
fn parse_mp4_batch(data: &[u8], path: &str) -> Option<PreSerializedFile> {
let mut f = mp4::MP4File::parse(data, path).ok()?;
f.ensure_parsed_with_data(data);
let mut tags = Vec::with_capacity(f.tags.items.len());
for (key, value) in f.tags.items.iter() {
tags.push((key.clone(), mp4_value_to_batch(value)));
}
let extra = vec![
("codec", BatchTagValue::Text(f.info.codec.clone())),
("bits_per_sample", BatchTagValue::Int(f.info.bits_per_sample as i64)),
];
Some(PreSerializedFile {
length: f.info.length,
sample_rate: f.info.sample_rate,
channels: f.info.channels as u32,
bitrate: if f.info.bitrate > 0 { Some(f.info.bitrate) } else { None },
tags,
extra,
lazy_vc: None,
})
}
#[inline(always)]
fn parse_and_serialize(data: &[u8], path: &str) -> Option<PreSerializedFile> {
let ext = path.rsplit('.').next().unwrap_or("");
if ext.eq_ignore_ascii_case("flac") {
return parse_flac_batch(data, data.len());
}
if ext.eq_ignore_ascii_case("ogg") {
return parse_ogg_batch(data);
}
if ext.eq_ignore_ascii_case("mp3") {
return parse_mp3_batch(data, path);
}
if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
|| ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
return parse_mp4_batch(data, path);
}
let mp3_score = mp3::MP3File::score(path, data);
let flac_score = flac::FLACFile::score(path, data);
let ogg_score = ogg::OggVorbisFile::score(path, data);
let mp4_score = mp4::MP4File::score(path, data);
let max_score = mp3_score.max(flac_score).max(ogg_score).max(mp4_score);
if max_score == 0 {
return None;
}
if max_score == flac_score {
parse_flac_batch(data, data.len())
} else if max_score == ogg_score {
parse_ogg_batch(data)
} else if max_score == mp4_score {
parse_mp4_batch(data, path)
} else {
parse_mp3_batch(data, path)
}
}
#[inline(always)]
fn batch_value_to_py(py: Python<'_>, bv: &BatchTagValue) -> PyResult<Py<PyAny>> {
match bv {
BatchTagValue::Text(s) => Ok(s.as_str().into_pyobject(py)?.into_any().unbind()),
BatchTagValue::TextList(v) => Ok(PyList::new(py, v)?.into_any().unbind()),
BatchTagValue::Bytes(d) => Ok(PyBytes::new(py, d).into_any().unbind()),
BatchTagValue::Int(i) => Ok(i.into_pyobject(py)?.into_any().unbind()),
BatchTagValue::IntPair(a, b) => Ok(PyTuple::new(py, &[*a, *b])?.into_any().unbind()),
BatchTagValue::Bool(v) => Ok((*v).into_pyobject(py)?.to_owned().into_any().unbind()),
BatchTagValue::Picture { mime, pic_type, desc, data } => {
let dict = PyDict::new(py);
dict.set_item(pyo3::intern!(py, "mime"), mime.as_str())?;
dict.set_item(pyo3::intern!(py, "type"), *pic_type)?;
dict.set_item(pyo3::intern!(py, "desc"), desc.as_str())?;
dict.set_item(pyo3::intern!(py, "data"), PyBytes::new(py, data))?;
Ok(dict.into_any().unbind())
}
BatchTagValue::Popularimeter { email, rating, count } => {
Ok(Py::new(py, PyPOPM {
email: email.clone(),
rating: *rating,
count: *count,
})?.into_any())
}
BatchTagValue::PairedText(pairs) => {
let py_pairs: Vec<(&str, &str)> = pairs.iter().map(|(a, b)| (a.as_str(), b.as_str())).collect();
Ok(PyList::new(py, &py_pairs)?.into_any().unbind())
}
BatchTagValue::CoverList(covers) => {
let list = PyList::empty(py);
for (data, format) in covers {
let dict = PyDict::new(py);
dict.set_item(pyo3::intern!(py, "data"), PyBytes::new(py, data))?;
dict.set_item(pyo3::intern!(py, "format"), *format)?;
list.append(dict)?;
}
Ok(list.into_any().unbind())
}
BatchTagValue::FreeFormList(forms) => {
let list = PyList::empty(py);
for data in forms {
list.append(PyBytes::new(py, data))?;
}
Ok(list.into_any().unbind())
}
}
}
#[inline(always)]
unsafe fn batch_value_to_py_ffi(py: Python<'_>, bv: &BatchTagValue) -> *mut pyo3::ffi::PyObject {
match bv {
BatchTagValue::Text(s) => {
pyo3::ffi::PyUnicode_FromStringAndSize(
s.as_ptr() as *const std::ffi::c_char,
s.len() as pyo3::ffi::Py_ssize_t)
}
BatchTagValue::TextList(v) => {
let list = pyo3::ffi::PyList_New(v.len() as pyo3::ffi::Py_ssize_t);
if list.is_null() { return std::ptr::null_mut(); }
for (i, s) in v.iter().enumerate() {
let obj = pyo3::ffi::PyUnicode_FromStringAndSize(
s.as_ptr() as *const std::ffi::c_char,
s.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::PyList_SET_ITEM(list, i as pyo3::ffi::Py_ssize_t, obj); }
list
}
BatchTagValue::Bytes(d) => {
pyo3::ffi::PyBytes_FromStringAndSize(
d.as_ptr() as *const std::ffi::c_char,
d.len() as pyo3::ffi::Py_ssize_t)
}
BatchTagValue::Int(i) => pyo3::ffi::PyLong_FromLongLong(*i),
BatchTagValue::IntPair(a, b) => {
match PyTuple::new(py, &[*a, *b]) {
Ok(t) => { let ptr = t.as_ptr(); pyo3::ffi::Py_INCREF(ptr); ptr }
Err(_) => std::ptr::null_mut()
}
}
BatchTagValue::Bool(v) => {
if *v { pyo3::ffi::Py_INCREF(pyo3::ffi::Py_True()); pyo3::ffi::Py_True() }
else { pyo3::ffi::Py_INCREF(pyo3::ffi::Py_False()); pyo3::ffi::Py_False() }
}
_ => {
match batch_value_to_py(py, bv) {
Ok(obj) => { let ptr = obj.as_ptr(); pyo3::ffi::Py_INCREF(ptr); ptr }
Err(_) => std::ptr::null_mut()
}
}
}
}
#[inline(always)]
fn preserialized_to_py_dict(py: Python<'_>, pf: &PreSerializedFile) -> PyResult<Py<PyAny>> {
unsafe {
let inner = pyo3::ffi::PyDict_New();
if inner.is_null() {
return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
}
set_dict_f64(inner, pyo3::intern!(py, "length").as_ptr(), pf.length);
set_dict_u32(inner, pyo3::intern!(py, "sample_rate").as_ptr(), pf.sample_rate);
set_dict_u32(inner, pyo3::intern!(py, "channels").as_ptr(), pf.channels);
if let Some(br) = pf.bitrate {
set_dict_u32(inner, pyo3::intern!(py, "bitrate").as_ptr(), br);
}
if pf.tags.is_empty() {
if let Some(ref vc_bytes) = pf.lazy_vc {
let tags_dict = pyo3::ffi::PyDict_New();
if !tags_dict.is_null() {
parse_vc_to_ffi_dict(vc_bytes, tags_dict);
pyo3::ffi::PyDict_SetItem(inner, pyo3::intern!(py, "tags").as_ptr(), tags_dict);
pyo3::ffi::Py_DECREF(tags_dict);
}
} else {
let tags_dict = pyo3::ffi::PyDict_New();
if !tags_dict.is_null() {
pyo3::ffi::PyDict_SetItem(inner, pyo3::intern!(py, "tags").as_ptr(), tags_dict);
pyo3::ffi::Py_DECREF(tags_dict);
}
}
} else {
let tags_dict = pyo3::ffi::PyDict_New();
if !tags_dict.is_null() {
for (key, value) in &pf.tags {
let key_ptr = intern_tag_key(key.as_bytes());
if key_ptr.is_null() { continue; }
let val_ptr = batch_value_to_py_ffi(py, value);
if !val_ptr.is_null() {
pyo3::ffi::PyDict_SetItem(tags_dict, key_ptr, val_ptr);
pyo3::ffi::Py_DECREF(val_ptr);
}
pyo3::ffi::Py_DECREF(key_ptr);
}
pyo3::ffi::PyDict_SetItem(inner, pyo3::intern!(py, "tags").as_ptr(), tags_dict);
pyo3::ffi::Py_DECREF(tags_dict);
}
}
Ok(Bound::from_owned_ptr(py, inner).unbind())
}
}
#[inline(always)]
unsafe fn parse_vc_to_ffi_dict(data: &[u8], tags_dict: *mut pyo3::ffi::PyObject) {
if data.len() < 8 { return; }
let mut pos = 0;
let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + vendor_len > data.len() { return; }
pos += vendor_len;
if pos + 4 > data.len() { return; }
let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
for _ in 0..count.min(256) {
if pos + 4 > data.len() { break; }
let clen = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + clen > data.len() { break; }
let raw = &data[pos..pos + clen];
pos += clen;
let eq_pos = match memchr::memchr(b'=', raw) {
Some(p) => p,
None => continue,
};
let key_bytes = &raw[..eq_pos];
let value_bytes = &raw[eq_pos + 1..];
let mut buf = [0u8; 128];
let key_len = key_bytes.len().min(128);
for i in 0..key_len { buf[i] = key_bytes[i].to_ascii_lowercase(); }
let key_ptr = intern_tag_key(&buf[..key_len]);
if key_ptr.is_null() { pyo3::ffi::PyErr_Clear(); continue; }
let val_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
value_bytes.as_ptr() as *const std::ffi::c_char,
value_bytes.len() as pyo3::ffi::Py_ssize_t);
if val_ptr.is_null() {
pyo3::ffi::PyErr_Clear();
pyo3::ffi::Py_DECREF(key_ptr);
continue;
}
let existing = pyo3::ffi::PyDict_GetItem(tags_dict, key_ptr);
if !existing.is_null() {
if pyo3::ffi::PyList_Check(existing) != 0 {
pyo3::ffi::PyList_Append(existing, val_ptr);
pyo3::ffi::Py_DECREF(val_ptr);
} else {
let list = pyo3::ffi::PyList_New(2);
pyo3::ffi::Py_INCREF(existing);
pyo3::ffi::PyList_SET_ITEM(list, 0, existing);
pyo3::ffi::PyList_SET_ITEM(list, 1, val_ptr);
pyo3::ffi::PyDict_SetItem(tags_dict, key_ptr, list);
pyo3::ffi::Py_DECREF(list);
}
pyo3::ffi::Py_DECREF(key_ptr);
} else {
let list = pyo3::ffi::PyList_New(1);
pyo3::ffi::PyList_SET_ITEM(list, 0, val_ptr);
pyo3::ffi::PyDict_SetItem(tags_dict, key_ptr, list);
pyo3::ffi::Py_DECREF(list);
pyo3::ffi::Py_DECREF(key_ptr);
}
}
}
#[allow(dead_code)]
#[inline(always)]
fn json_escape_to(s: &str, out: &mut String) {
out.push('"');
let needs_escape = s.bytes().any(|b| b == b'"' || b == b'\\' || b < 0x20);
if !needs_escape {
out.push_str(s);
} else {
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => {
out.push_str(&format!("\\u{:04x}", c as u32));
}
c => out.push(c),
}
}
}
out.push('"');
}
#[allow(dead_code)]
#[inline(always)]
fn batch_value_to_json(bv: &BatchTagValue, out: &mut String) {
match bv {
BatchTagValue::Text(s) => json_escape_to(s, out),
BatchTagValue::TextList(v) => {
out.push('[');
for (i, s) in v.iter().enumerate() {
if i > 0 { out.push(','); }
json_escape_to(s, out);
}
out.push(']');
}
BatchTagValue::Int(i) => {
write_int(out, *i);
}
BatchTagValue::IntPair(a, b) => {
out.push('[');
write_int(out, *a);
out.push(',');
write_int(out, *b);
out.push(']');
}
BatchTagValue::Bool(v) => {
out.push_str(if *v { "true" } else { "false" });
}
BatchTagValue::PairedText(pairs) => {
out.push('[');
for (i, (a, b)) in pairs.iter().enumerate() {
if i > 0 { out.push(','); }
out.push('[');
json_escape_to(a, out);
out.push(',');
json_escape_to(b, out);
out.push(']');
}
out.push(']');
}
BatchTagValue::Bytes(_) | BatchTagValue::Picture { .. } |
BatchTagValue::Popularimeter { .. } | BatchTagValue::CoverList(_) |
BatchTagValue::FreeFormList(_) => {
out.push_str("null");
}
}
}
#[allow(dead_code)]
#[inline(always)]
fn write_int(out: &mut String, v: impl itoa::Integer) {
let mut buf = itoa::Buffer::new();
out.push_str(buf.format(v));
}
#[allow(dead_code)]
#[inline(always)]
fn write_float(out: &mut String, v: f64) {
let mut buf = ryu::Buffer::new();
out.push_str(buf.format(v));
}
#[allow(dead_code)]
#[inline(always)]
fn preserialized_to_json(pf: &PreSerializedFile, out: &mut String) {
out.push_str("{\"length\":");
write_float(out, pf.length);
out.push_str(",\"sample_rate\":");
write_int(out, pf.sample_rate);
out.push_str(",\"channels\":");
write_int(out, pf.channels);
if let Some(br) = pf.bitrate {
out.push_str(",\"bitrate\":");
write_int(out, br);
}
let lazy_tags;
let tags = if pf.tags.is_empty() {
if let Some(ref vc_bytes) = pf.lazy_vc {
lazy_tags = parse_vc_to_batch_tags(vc_bytes);
&lazy_tags
} else {
&pf.tags
}
} else {
&pf.tags
};
out.push_str(",\"tags\":{");
let mut first = true;
for (key, value) in tags {
if matches!(value, BatchTagValue::Bytes(_) | BatchTagValue::Picture { .. } |
BatchTagValue::Popularimeter { .. } | BatchTagValue::CoverList(_) |
BatchTagValue::FreeFormList(_)) {
continue;
}
if !first { out.push(','); }
first = false;
json_escape_to(key, out);
out.push(':');
batch_value_to_json(value, out);
}
out.push_str("}}");
}
#[pyclass(name = "BatchResult")]
struct PyBatchResult {
paths: Vec<String>,
dicts: Vec<Py<PyAny>>,
index: HashMap<String, usize>,
}
#[pymethods]
impl PyBatchResult {
fn __len__(&self) -> usize {
self.paths.len()
}
fn keys(&self) -> Vec<String> {
self.paths.clone()
}
fn __contains__(&self, path: &str) -> bool {
self.index.contains_key(path)
}
fn __getitem__(&self, py: Python<'_>, path: &str) -> PyResult<Py<PyAny>> {
if let Some(&idx) = self.index.get(path) {
return Ok(self.dicts[idx].clone_ref(py));
}
Err(PyKeyError::new_err(path.to_string()))
}
fn items(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
let list = PyList::empty(py);
for (i, p) in self.paths.iter().enumerate() {
unsafe {
let copy = pyo3::ffi::PyDict_Copy(self.dicts[i].as_ptr());
if copy.is_null() { continue; }
let dict_obj = Bound::from_owned_ptr(py, copy);
let tuple = PyTuple::new(py, &[p.as_str().into_pyobject(py)?.into_any(), dict_obj.into_any()])?;
list.append(tuple)?;
}
}
Ok(list.into_any().unbind())
}
}
#[cfg(unix)]
fn batch_open_io(filenames: &[String], exts: &[&str]) -> Vec<(usize, Arc<PreSerializedFile>)> {
use rayon::prelude::*;
let n = filenames.len();
if n == 0 { return Vec::new(); }
let common_dir = if !filenames.is_empty() {
let first_dir = filenames[0].rsplit_once('/').map(|(d, _)| d);
if let Some(dir) = first_dir {
if filenames.iter().all(|p| p.rsplit_once('/').map(|(d, _)| d) == Some(dir)) {
Some(dir.to_string())
} else { None }
} else { None }
} else { None };
let (c_names, dir_fd): (Vec<std::ffi::CString>, i32) = if let Some(ref dir) = common_dir {
let names: Vec<std::ffi::CString> = filenames.iter()
.map(|p| {
let rel = p.rsplit_once('/').map(|(_, f)| f).unwrap_or(p);
std::ffi::CString::new(rel).unwrap_or_default()
})
.collect();
let c_dir = std::ffi::CString::new(dir.as_str()).unwrap_or_default();
let dfd = unsafe { libc::open(c_dir.as_ptr(), libc::O_RDONLY | libc::O_DIRECTORY) };
(names, dfd)
} else {
let paths: Vec<std::ffi::CString> = filenames.iter()
.map(|p| std::ffi::CString::new(p.as_str()).unwrap_or_default())
.collect();
(paths, -1)
};
#[cfg(target_os = "linux")]
let noatime_flag: libc::c_int = libc::O_NOATIME;
#[cfg(not(target_os = "linux"))]
let noatime_flag: libc::c_int = 0;
let open_file = |i: usize| -> i32 {
unsafe {
let flags = libc::O_RDONLY | noatime_flag;
let f = if dir_fd >= 0 {
libc::openat(dir_fd, c_names[i].as_ptr(), flags)
} else {
libc::open(c_names[i].as_ptr(), flags)
};
if f >= 0 { f } else if dir_fd >= 0 {
libc::openat(dir_fd, c_names[i].as_ptr(), libc::O_RDONLY)
} else {
libc::open(c_names[i].as_ptr(), libc::O_RDONLY)
}
}
};
let sizes: Vec<i64> = (0..n).into_par_iter()
.map(|i| {
let mut stat_buf: libc::stat = unsafe { std::mem::zeroed() };
let rc = unsafe {
if dir_fd >= 0 {
libc::fstatat(dir_fd, c_names[i].as_ptr(), &mut stat_buf, 0)
} else {
libc::stat(c_names[i].as_ptr(), &mut stat_buf)
}
};
if rc == 0 { stat_buf.st_size as i64 } else { -1 }
})
.collect();
let mut sorted_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
for i in 0..n {
if sizes[i] >= 0 {
let ext_id: u64 = match exts[i].as_bytes() {
b"mp3" | b"MP3" => 1,
b"flac" | b"FLAC" => 2,
b"ogg" | b"OGG" => 3,
b"mp4" | b"MP4" | b"m4a" | b"M4A" | b"m4b" | b"M4B" => 4,
_ => 0,
};
sorted_indices.push(((sizes[i] as u64) << 4 | ext_id, i));
}
}
sorted_indices.sort_unstable_by_key(|&(k, _)| k);
let mut reps: Vec<usize> = Vec::new();
let mut group_bounds: Vec<usize> = Vec::new();
{
let mut i = 0;
while i < sorted_indices.len() {
let key = sorted_indices[i].0;
reps.push(sorted_indices[i].1);
group_bounds.push(i);
while i < sorted_indices.len() && sorted_indices[i].0 == key { i += 1; }
}
group_bounds.push(sorted_indices.len());
}
let parsed: HashMap<usize, Arc<PreSerializedFile>> = reps.par_iter().copied()
.filter_map(|i| {
let fd = open_file(i);
if fd < 0 { return None; }
let file_len = sizes[i] as usize;
let ext = exts[i];
let pf = if ext.eq_ignore_ascii_case("flac") && file_len > 4096 {
let mut buf = vec![0u8; 4096];
let nr = unsafe {
libc::pread(fd, buf.as_mut_ptr() as *mut libc::c_void, 4096, 0)
};
if nr <= 0 { unsafe { libc::close(fd); } return None; }
buf.truncate(nr as usize);
if let Some(pf) = parse_flac_batch(&buf, file_len) {
if pf.lazy_vc.is_some() {
unsafe { libc::close(fd); }
Some(pf)
} else {
let mut data = vec![0u8; file_len];
let nr2 = unsafe {
libc::pread(fd, data.as_mut_ptr() as *mut libc::c_void, file_len, 0)
};
unsafe { libc::close(fd); }
if nr2 <= 0 { return None; }
data.truncate(nr2 as usize);
parse_flac_batch(&data, file_len)
}
} else {
let mut data = vec![0u8; file_len];
let nr2 = unsafe {
libc::pread(fd, data.as_mut_ptr() as *mut libc::c_void, file_len, 0)
};
unsafe { libc::close(fd); }
if nr2 <= 0 { return None; }
data.truncate(nr2 as usize);
parse_flac_batch(&data, file_len)
}
} else {
let mut data = vec![0u8; file_len];
let nr = unsafe {
libc::pread(fd, data.as_mut_ptr() as *mut libc::c_void, file_len, 0)
};
unsafe { libc::close(fd); }
if nr <= 0 { return None; }
data.truncate(nr as usize);
parse_and_serialize(&data, &filenames[i])
}?;
Some((i, Arc::new(pf)))
})
.collect();
if dir_fd >= 0 { unsafe { libc::close(dir_fd); } }
let mut results: Vec<(usize, Arc<PreSerializedFile>)> = Vec::with_capacity(n);
for (g, &rep) in reps.iter().enumerate() {
if let Some(pf) = parsed.get(&rep) {
for j in group_bounds[g]..group_bounds[g + 1] {
results.push((sorted_indices[j].1, Arc::clone(pf)));
}
}
}
results
}
#[cfg(not(unix))]
fn batch_open_io(filenames: &[String], exts: &[&str]) -> Vec<(usize, Arc<PreSerializedFile>)> {
use rayon::prelude::*;
use std::io::Read;
let n = filenames.len();
if n == 0 { return Vec::new(); }
let sizes: Vec<i64> = (0..n).into_par_iter()
.map(|i| std::fs::metadata(&filenames[i]).map(|m| m.len() as i64).unwrap_or(-1))
.collect();
let mut sorted_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
for i in 0..n {
if sizes[i] >= 0 {
let ext_id: u64 = match exts[i].as_bytes() {
b"mp3" | b"MP3" => 1,
b"flac" | b"FLAC" => 2,
b"ogg" | b"OGG" => 3,
b"mp4" | b"MP4" | b"m4a" | b"M4A" | b"m4b" | b"M4B" => 4,
_ => 0,
};
sorted_indices.push(((sizes[i] as u64) << 4 | ext_id, i));
}
}
sorted_indices.sort_unstable_by_key(|&(k, _)| k);
let mut reps: Vec<usize> = Vec::new();
let mut group_bounds: Vec<usize> = Vec::new();
{
let mut i = 0;
while i < sorted_indices.len() {
let key = sorted_indices[i].0;
reps.push(sorted_indices[i].1);
group_bounds.push(i);
while i < sorted_indices.len() && sorted_indices[i].0 == key { i += 1; }
}
group_bounds.push(sorted_indices.len());
}
let parsed: HashMap<usize, Arc<PreSerializedFile>> = reps.par_iter().copied()
.filter_map(|i| {
let file_len = sizes[i] as usize;
let ext = exts[i];
let pf = if ext.eq_ignore_ascii_case("flac") && file_len > 4096 {
let mut file = std::fs::File::open(&filenames[i]).ok()?;
let mut buf = vec![0u8; 4096];
file.read_exact(&mut buf).ok()?;
if let Some(pf) = parse_flac_batch(&buf, file_len) {
if pf.lazy_vc.is_some() {
Some(pf)
} else {
let data = std::fs::read(&filenames[i]).ok()?;
parse_flac_batch(&data, file_len)
}
} else {
let data = std::fs::read(&filenames[i]).ok()?;
parse_flac_batch(&data, file_len)
}
} else {
let data = std::fs::read(&filenames[i]).ok()?;
parse_and_serialize(&data, &filenames[i])
}?;
Some((i, Arc::new(pf)))
})
.collect();
let mut results: Vec<(usize, Arc<PreSerializedFile>)> = Vec::with_capacity(n);
for (g, &rep) in reps.iter().enumerate() {
if let Some(pf) = parsed.get(&rep) {
for j in group_bounds[g]..group_bounds[g + 1] {
results.push((sorted_indices[j].1, Arc::clone(pf)));
}
}
}
results
}
#[pyfunction]
fn batch_open(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
let exts: Vec<&str> = filenames.iter()
.map(|p| p.rsplit('.').next().unwrap_or(""))
.collect();
let file_indices: Vec<(usize, Arc<PreSerializedFile>)> =
py.detach(|| batch_open_io(&filenames, &exts));
unsafe {
let result_ptr = pyo3::ffi::PyDict_New();
if result_ptr.is_null() {
return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
}
let mut mat_cache: HashMap<usize, *mut pyo3::ffi::PyObject> = HashMap::new();
for &(idx, ref pf) in &file_indices {
let cache_key = Arc::as_ptr(pf) as usize;
let dict_ptr = if let Some(&cached) = mat_cache.get(&cache_key) {
cached
} else {
let d = preserialized_to_py_dict(py, pf)?.into_ptr();
mat_cache.insert(cache_key, d);
d
};
let path = &filenames[idx];
let path_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
path.as_ptr() as *const std::ffi::c_char, path.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::PyDict_SetItem(result_ptr, path_ptr, dict_ptr);
pyo3::ffi::Py_DECREF(path_ptr);
}
for (_, ptr) in &mat_cache {
pyo3::ffi::Py_DECREF(*ptr);
}
Ok(Bound::from_owned_ptr(py, result_ptr).unbind())
}
}
#[pyfunction]
fn _fast_batch_read(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
use rayon::prelude::*;
use std::sync::Arc;
let parsed: Vec<(String, Arc<PreSerializedFile>)> = py.detach(|| {
let n = filenames.len();
if n == 0 { return Vec::new(); }
let dedup: std::sync::RwLock<HashMap<[u8; 64], Arc<PreSerializedFile>>> =
std::sync::RwLock::new(HashMap::with_capacity(n / 4));
(0..n).into_par_iter()
.with_min_len(4)
.filter_map(|i| {
use std::io::{Read, Seek};
let path = &filenames[i];
let mut file = std::fs::File::open(path).ok()?;
let mut header = [0u8; 64];
let hdr_n = file.read(&mut header).ok()?;
if hdr_n == 0 { return None; }
{
if let Ok(cache) = dedup.read() {
if let Some(pf) = cache.get(&header) {
return Some((path.clone(), Arc::clone(pf)));
}
}
}
let file_len = file.metadata().ok()?.len() as usize;
file.seek(std::io::SeekFrom::Start(0)).ok()?;
let pf = if file_len > 32768 {
let mmap = unsafe { memmap2::Mmap::map(&file).ok()? };
parse_and_serialize(&mmap, path)
} else {
let mut data = Vec::with_capacity(file_len);
file.read_to_end(&mut data).ok()?;
parse_and_serialize(&data, path)
}?;
let arc = Arc::new(pf);
if let Ok(mut cache) = dedup.write() {
cache.insert(header, Arc::clone(&arc));
}
Some((path.clone(), arc))
})
.collect()
});
unsafe {
let result_ptr = pyo3::ffi::PyDict_New();
if result_ptr.is_null() {
return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
}
let mut template_cache: HashMap<*const PreSerializedFile, *mut pyo3::ffi::PyObject> =
HashMap::with_capacity(parsed.len() / 4 + 1);
for (path, pf) in &parsed {
let arc_ptr = Arc::as_ptr(pf);
let dict_ptr = if let Some(&template) = template_cache.get(&arc_ptr) {
pyo3::ffi::PyDict_Copy(template)
} else {
let dp = pyo3::ffi::PyDict_New();
if dp.is_null() { continue; }
set_dict_f64(dp, pyo3::intern!(py, "length").as_ptr(), pf.length);
set_dict_u32(dp, pyo3::intern!(py, "sample_rate").as_ptr(), pf.sample_rate);
set_dict_u32(dp, pyo3::intern!(py, "channels").as_ptr(), pf.channels);
if let Some(br) = pf.bitrate {
set_dict_u32(dp, pyo3::intern!(py, "bitrate").as_ptr(), br);
}
for (key, value) in &pf.extra {
let py_val = batch_value_to_py(py, value)?;
let key_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
key.as_ptr() as *const std::ffi::c_char, key.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::PyDict_SetItem(dp, key_ptr, py_val.as_ptr());
pyo3::ffi::Py_DECREF(key_ptr);
}
if pf.tags.is_empty() {
if let Some(ref vc_bytes) = pf.lazy_vc {
parse_vc_to_ffi_dict(vc_bytes, dp);
}
} else {
for (key, value) in &pf.tags {
let py_val = batch_value_to_py_ffi(py, value);
if py_val.is_null() { continue; }
let key_ptr = intern_tag_key(key.as_bytes());
if key_ptr.is_null() {
pyo3::ffi::Py_DECREF(py_val);
continue;
}
pyo3::ffi::PyDict_SetItem(dp, key_ptr, py_val);
pyo3::ffi::Py_DECREF(py_val);
pyo3::ffi::Py_DECREF(key_ptr);
}
}
pyo3::ffi::Py_INCREF(dp);
template_cache.insert(arc_ptr, dp);
dp
};
if dict_ptr.is_null() { continue; }
let path_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
path.as_ptr() as *const std::ffi::c_char, path.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::PyDict_SetItem(result_ptr, path_ptr, dict_ptr);
pyo3::ffi::Py_DECREF(path_ptr);
pyo3::ffi::Py_DECREF(dict_ptr);
}
for (_, ptr) in &template_cache {
pyo3::ffi::Py_DECREF(*ptr);
}
Ok(Bound::from_owned_ptr(py, result_ptr).unbind())
}
}
#[pyfunction]
fn batch_diag(py: Python<'_>, filenames: Vec<String>) -> PyResult<String> {
use rayon::prelude::*;
use std::time::Instant;
let result = py.detach(|| {
let n = filenames.len();
let t1 = Instant::now();
let file_data: Vec<(String, Vec<u8>)> = filenames.iter()
.filter_map(|p| std::fs::read(p).ok().map(|d| (p.clone(), d)))
.collect();
let read_seq_us = t1.elapsed().as_micros();
let t2 = Instant::now();
let _: Vec<_> = file_data.iter()
.filter_map(|(p, d)| parse_and_serialize(d, p).map(|pf| (p.clone(), pf)))
.collect();
let parse_seq_us = t2.elapsed().as_micros();
let t3 = Instant::now();
let _: Vec<_> = file_data.par_iter()
.filter_map(|(p, d)| parse_and_serialize(d, p).map(|pf| (p.clone(), pf)))
.collect();
let parse_par_us = t3.elapsed().as_micros();
let t4 = Instant::now();
let _: Vec<_> = filenames.par_iter().filter_map(|path| {
let data = std::fs::read(path).ok()?;
let pf = parse_and_serialize(&data, path)?;
Some((path.clone(), pf))
}).collect();
let full_par_us = t4.elapsed().as_micros();
format!(
"n={} | seq_read={}µs seq_parse={}µs par_parse={}µs full_par={}µs | \
parse_par_speedup={:.1}x io_fraction={:.0}%",
n, read_seq_us, parse_seq_us, parse_par_us, full_par_us,
parse_seq_us as f64 / parse_par_us.max(1) as f64,
read_seq_us as f64 / (read_seq_us + parse_seq_us).max(1) as f64 * 100.0,
)
});
Ok(result)
}
#[pyfunction]
#[pyo3(signature = (filename, easy=false))]
fn file_open(py: Python<'_>, filename: &str, easy: bool) -> PyResult<Py<PyAny>> {
let _ = easy;
let data = read_cached(filename)
.map_err(|e| PyIOError::new_err(format!("Cannot open file: {}", e)))?;
let ext = filename.rsplit('.').next().unwrap_or("");
if ext.eq_ignore_ascii_case("flac") {
let f = PyFLAC::from_data(py, &data, filename)?;
return Ok(f.into_pyobject(py)?.into_any().unbind());
}
if ext.eq_ignore_ascii_case("ogg") {
let f = PyOggVorbis::from_data(py, &data, filename)?;
return Ok(f.into_pyobject(py)?.into_any().unbind());
}
if ext.eq_ignore_ascii_case("mp3") {
let f = PyMP3::from_data(py, &data, filename)?;
return Ok(f.into_pyobject(py)?.into_any().unbind());
}
if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
|| ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
let f = PyMP4::from_data(py, &data, filename)?;
return Ok(f.into_pyobject(py)?.into_any().unbind());
}
let mp3_score = mp3::MP3File::score(filename, &data);
let flac_score = flac::FLACFile::score(filename, &data);
let ogg_score = ogg::OggVorbisFile::score(filename, &data);
let mp4_score = mp4::MP4File::score(filename, &data);
let max_score = mp3_score.max(flac_score).max(ogg_score).max(mp4_score);
if max_score == 0 {
return Err(PyValueError::new_err(format!(
"Unable to detect format for: {}",
filename
)));
}
if max_score == flac_score {
let f = PyFLAC::from_data(py, &data, filename)?;
Ok(f.into_pyobject(py)?.into_any().unbind())
} else if max_score == ogg_score {
let f = PyOggVorbis::from_data(py, &data, filename)?;
Ok(f.into_pyobject(py)?.into_any().unbind())
} else if max_score == mp4_score {
let f = PyMP4::from_data(py, &data, filename)?;
Ok(f.into_pyobject(py)?.into_any().unbind())
} else {
let f = PyMP3::from_data(py, &data, filename)?;
Ok(f.into_pyobject(py)?.into_any().unbind())
}
}
static RESULT_CACHE: OnceLock<RwLock<HashMap<String, Py<PyDict>>>> = OnceLock::new();
fn get_result_cache() -> &'static RwLock<HashMap<String, Py<PyDict>>> {
RESULT_CACHE.get_or_init(|| RwLock::new(HashMap::with_capacity(256)))
}
static TEMPLATE_CACHE: OnceLock<RwLock<HashMap<String, Py<PyDict>>>> = OnceLock::new();
fn get_template_cache() -> &'static RwLock<HashMap<String, Py<PyDict>>> {
TEMPLATE_CACHE.get_or_init(|| RwLock::new(HashMap::with_capacity(256)))
}
#[pyfunction]
fn clear_cache(_py: Python<'_>) {
let cache = get_result_cache();
let mut guard = cache.write().unwrap();
guard.clear();
}
#[pyfunction]
fn clear_all_caches(_py: Python<'_>) {
{
let cache = get_file_cache();
let mut guard = cache.write().unwrap();
guard.clear();
}
{
let cache = get_template_cache();
let mut guard = cache.write().unwrap();
guard.clear();
}
{
let cache = get_result_cache();
let mut guard = cache.write().unwrap();
guard.clear();
}
}
fn invalidate_file(path: &str) {
{
let cache = get_file_cache();
let mut guard = cache.write().unwrap();
guard.remove(path);
}
{
let cache = get_template_cache();
let mut guard = cache.write().unwrap();
guard.remove(path);
}
{
let cache = get_result_cache();
let mut guard = cache.write().unwrap();
guard.remove(path);
}
}
#[pyfunction]
fn _rust_batch_open(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
batch_open(py, filenames)
}
#[inline(always)]
fn preserialized_to_flat_dict(py: Python<'_>, pf: &PreSerializedFile, dict: &Bound<'_, PyDict>) -> PyResult<()> {
dict.set_item(pyo3::intern!(py, "length"), pf.length)?;
dict.set_item(pyo3::intern!(py, "sample_rate"), pf.sample_rate)?;
dict.set_item(pyo3::intern!(py, "channels"), pf.channels)?;
if let Some(br) = pf.bitrate {
dict.set_item(pyo3::intern!(py, "bitrate"), br)?;
}
for (key, value) in &pf.extra {
dict.set_item(*key, batch_value_to_py(py, value)?)?;
}
let lazy_tags;
let tags = if pf.tags.is_empty() {
if let Some(ref vc_bytes) = pf.lazy_vc {
lazy_tags = parse_vc_to_batch_tags(vc_bytes);
&lazy_tags
} else {
&pf.tags
}
} else {
&pf.tags
};
let mut keys: Vec<&str> = Vec::with_capacity(tags.len());
for (key, value) in tags {
dict.set_item(key.as_str(), batch_value_to_py(py, value)?)?;
keys.push(key.as_str());
}
dict.set_item(pyo3::intern!(py, "_keys"), PyList::new(py, &keys)?)?;
Ok(())
}
#[inline(always)]
#[allow(dead_code)]
fn eq_ascii_ci(a: &[u8], b: &[u8]) -> bool {
a.len() == b.len() && a.iter().zip(b.iter()).all(|(&x, &y)| x.to_ascii_uppercase() == y.to_ascii_uppercase())
}
#[inline(always)]
#[allow(dead_code)]
fn vc_key_to_py<'py>(py: Python<'py>, key_bytes: &[u8]) -> Option<Bound<'py, PyAny>> {
if key_bytes.iter().all(|&b| !b.is_ascii_uppercase()) {
std::str::from_utf8(key_bytes).ok()
.and_then(|s| s.into_pyobject(py).ok())
.map(|o| o.into_any())
} else {
let mut buf = [0u8; 128];
let len = key_bytes.len().min(128);
for i in 0..len {
buf[i] = key_bytes[i].to_ascii_lowercase();
}
std::str::from_utf8(&buf[..len]).ok()
.and_then(|s| s.into_pyobject(py).ok())
.map(|o| o.into_any())
}
}
#[inline(always)]
#[allow(dead_code)]
fn parse_vc_grouped<'a>(data: &'a [u8]) -> Vec<(&'a [u8], Vec<&'a str>)> {
if data.len() < 8 { return Vec::new(); }
let mut pos = 0;
let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + vendor_len > data.len() { return Vec::new(); }
pos += vendor_len;
if pos + 4 > data.len() { return Vec::new(); }
let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
let mut groups: Vec<(&[u8], Vec<&str>)> = Vec::with_capacity(count.min(32));
for _ in 0..count {
if pos + 4 > data.len() { break; }
let clen = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + clen > data.len() { break; }
let raw = &data[pos..pos + clen];
pos += clen;
let eq_pos = match memchr::memchr(b'=', raw) {
Some(p) => p,
None => continue,
};
let key = &raw[..eq_pos];
let value = match std::str::from_utf8(&raw[eq_pos + 1..]) {
Ok(s) => s,
Err(_) => continue,
};
if let Some(g) = groups.iter_mut().find(|(k, _)| eq_ascii_ci(k, key)) {
g.1.push(value);
} else {
groups.push((key, vec![value]));
}
}
groups
}
#[inline(always)]
#[allow(dead_code)]
fn emit_vc_groups_to_dict<'py>(
_py: Python<'py>,
groups: &[(&[u8], Vec<&str>)],
dict: &Bound<'py, PyDict>,
keys_out: &mut Vec<*mut pyo3::ffi::PyObject>,
) -> PyResult<()> {
let dict_ptr = dict.as_ptr();
for (key_bytes, values) in groups {
unsafe {
let key_ptr = if key_bytes.iter().all(|&b| !b.is_ascii_uppercase()) {
match std::str::from_utf8(key_bytes) {
Ok(s) => pyo3::ffi::PyUnicode_FromStringAndSize(
s.as_ptr() as *const std::ffi::c_char, s.len() as pyo3::ffi::Py_ssize_t),
Err(_) => continue,
}
} else {
let mut buf = [0u8; 128];
let len = key_bytes.len().min(128);
for i in 0..len { buf[i] = key_bytes[i].to_ascii_lowercase(); }
match std::str::from_utf8(&buf[..len]) {
Ok(s) => pyo3::ffi::PyUnicode_FromStringAndSize(
s.as_ptr() as *const std::ffi::c_char, s.len() as pyo3::ffi::Py_ssize_t),
Err(_) => continue,
}
};
if key_ptr.is_null() { continue; }
let list_ptr = pyo3::ffi::PyList_New(values.len() as pyo3::ffi::Py_ssize_t);
for (i, &value) in values.iter().enumerate() {
let val_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
value.as_ptr() as *const std::ffi::c_char, value.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::PyList_SET_ITEM(list_ptr, i as pyo3::ffi::Py_ssize_t, val_ptr);
}
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list_ptr);
pyo3::ffi::Py_DECREF(list_ptr);
keys_out.push(key_ptr);
}
}
Ok(())
}
#[inline(always)]
fn set_keys_list(
py: Python<'_>,
dict: &Bound<'_, PyDict>,
key_ptrs: Vec<*mut pyo3::ffi::PyObject>,
) -> PyResult<()> {
unsafe {
let keys_list = pyo3::ffi::PyList_New(key_ptrs.len() as pyo3::ffi::Py_ssize_t);
for (i, key_ptr) in key_ptrs.iter().enumerate() {
pyo3::ffi::Py_INCREF(*key_ptr);
pyo3::ffi::PyList_SET_ITEM(keys_list, i as pyo3::ffi::Py_ssize_t, *key_ptr);
}
let keys_key = pyo3::intern!(py, "_keys");
pyo3::ffi::PyDict_SetItem(dict.as_ptr(), keys_key.as_ptr(), keys_list);
pyo3::ffi::Py_DECREF(keys_list);
for key_ptr in key_ptrs {
pyo3::ffi::Py_DECREF(key_ptr);
}
}
Ok(())
}
use std::cell::RefCell;
thread_local! {
static TAG_KEY_INTERN: RefCell<HashMap<[u8; 8], *mut pyo3::ffi::PyObject>> = RefCell::new(HashMap::with_capacity(64));
}
#[inline(always)]
unsafe fn intern_tag_key(key: &[u8]) -> *mut pyo3::ffi::PyObject {
if key.len() > 8 {
return pyo3::ffi::PyUnicode_FromStringAndSize(
key.as_ptr() as *const std::ffi::c_char,
key.len() as pyo3::ffi::Py_ssize_t);
}
let mut buf = [0u8; 8];
buf[..key.len()].copy_from_slice(key);
TAG_KEY_INTERN.with(|cache| {
let mut cache = cache.borrow_mut();
if let Some(&ptr) = cache.get(&buf) {
pyo3::ffi::Py_INCREF(ptr);
ptr
} else {
let ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
key.as_ptr() as *const std::ffi::c_char,
key.len() as pyo3::ffi::Py_ssize_t);
if !ptr.is_null() {
pyo3::ffi::Py_INCREF(ptr); cache.insert(buf, ptr);
}
ptr
}
})
}
#[inline(always)]
unsafe fn set_dict_f64(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: f64) {
let v = pyo3::ffi::PyFloat_FromDouble(val);
pyo3::ffi::PyDict_SetItem(dict, key, v);
pyo3::ffi::Py_DECREF(v);
}
#[inline(always)]
unsafe fn set_dict_u32(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: u32) {
let v = pyo3::ffi::PyLong_FromUnsignedLong(val as std::ffi::c_ulong);
pyo3::ffi::PyDict_SetItem(dict, key, v);
pyo3::ffi::Py_DECREF(v);
}
#[inline(always)]
unsafe fn set_dict_i64(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: i64) {
let v = pyo3::ffi::PyLong_FromLongLong(val);
pyo3::ffi::PyDict_SetItem(dict, key, v);
pyo3::ffi::Py_DECREF(v);
}
#[inline(always)]
unsafe fn set_dict_bool(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: bool) {
let v = if val { pyo3::ffi::Py_True() } else { pyo3::ffi::Py_False() };
pyo3::ffi::Py_INCREF(v);
pyo3::ffi::PyDict_SetItem(dict, key, v);
pyo3::ffi::Py_DECREF(v);
}
#[inline(always)]
#[allow(dead_code)]
unsafe fn set_dict_str(dict: *mut pyo3::ffi::PyObject, key: *mut pyo3::ffi::PyObject, val: &str) {
let v = pyo3::ffi::PyUnicode_FromStringAndSize(
val.as_ptr() as *const std::ffi::c_char, val.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::PyDict_SetItem(dict, key, v);
pyo3::ffi::Py_DECREF(v);
}
#[inline(always)]
unsafe fn try_text_frame_to_py(data: &[u8]) -> Option<*mut pyo3::ffi::PyObject> {
if data.is_empty() { return None; }
let enc = data[0];
let text_data = &data[1..];
let mut len = text_data.len();
while len > 0 && text_data[len - 1] == 0 { len -= 1; }
if len == 0 { return None; }
let text = &text_data[..len];
match enc {
3 | 0 => {
let has_high = enc == 0 && text.iter().any(|&b| b >= 128);
let make_str = |s: &[u8]| -> *mut pyo3::ffi::PyObject {
if has_high {
pyo3::ffi::PyUnicode_DecodeLatin1(
s.as_ptr() as *const std::ffi::c_char,
s.len() as pyo3::ffi::Py_ssize_t,
std::ptr::null())
} else {
pyo3::ffi::PyUnicode_FromStringAndSize(
s.as_ptr() as *const std::ffi::c_char,
s.len() as pyo3::ffi::Py_ssize_t)
}
};
if enc == 3 && std::str::from_utf8(text).is_err() { return None; }
let ptr = make_str(text);
if ptr.is_null() { None } else { Some(ptr) }
}
_ => None }
}
fn resolve_tcon_genre_single(text: &str) -> String {
let genres = crate::id3::specs::GENRES;
if text.starts_with('(') {
if let Some(end) = text.find(')') {
if let Ok(n) = text[1..end].parse::<usize>() {
let suffix = &text[end + 1..];
if !suffix.is_empty() {
return suffix.to_string(); }
if n < genres.len() {
return genres[n].to_string(); }
}
}
}
if let Ok(n) = text.parse::<usize>() {
if n < genres.len() {
return genres[n].to_string();
}
}
text.to_string()
}
fn resolve_tcon_genre(text: &str) -> String {
if text.contains('\0') {
for part in text.split('\0') {
if !part.is_empty() {
return resolve_tcon_genre_single(part);
}
}
}
resolve_tcon_genre_single(text)
}
#[inline(always)]
fn fast_walk_v22_frames(
py: Python<'_>, tag_bytes: &[u8], offset: &mut usize,
dict_ptr: *mut pyo3::ffi::PyObject, key_ptrs: &mut Vec<*mut pyo3::ffi::PyObject>,
) {
while *offset + 6 <= tag_bytes.len() {
if tag_bytes[*offset] == 0 { break; }
let id_bytes = &tag_bytes[*offset..*offset+3];
if !id_bytes.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) {
*offset += 1;
while *offset + 6 <= tag_bytes.len() {
if tag_bytes[*offset] == 0 { break; }
let next_id = &tag_bytes[*offset..*offset+3];
if next_id.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) { break; }
*offset += 1;
}
continue;
}
let size = ((tag_bytes[*offset+3] as usize) << 16)
| ((tag_bytes[*offset+4] as usize) << 8)
| (tag_bytes[*offset+5] as usize);
*offset += 6;
if size == 0 { continue; }
if *offset + size > tag_bytes.len() { break; }
let frame_data = &tag_bytes[*offset..*offset+size];
*offset += size;
if id_bytes == b"PIC" {
if let Ok(frame) = id3::frames::parse_v22_picture_frame(frame_data) {
let key = frame.hash_key();
let py_val = frame_to_py(py, &frame);
unsafe {
let key_ptr = intern_tag_key(key.as_str().as_bytes());
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
key_ptrs.push(key_ptr);
}
}
continue;
}
let id_str = std::str::from_utf8(id_bytes).unwrap_or("XXX");
let v24_id = match id3::frames::convert_v22_frame_id(id_str) {
Some(id) => id,
None => continue,
};
if v24_id.as_bytes()[0] == b'T' && v24_id != "TXXX" && v24_id != "TIPL" && v24_id != "TMCL" && v24_id != "IPLS" {
unsafe {
if let Some(py_ptr) = try_text_frame_to_py(frame_data) {
let v24_bytes = v24_id.as_bytes();
let final_ptr = if v24_bytes == b"TCON" {
let py_str = pyo3::ffi::PyUnicode_AsUTF8(py_ptr);
if !py_str.is_null() {
let s = std::ffi::CStr::from_ptr(py_str).to_string_lossy();
let resolved = resolve_tcon_genre(&s);
let r = resolved.as_bytes();
let new_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
r.as_ptr() as *const std::ffi::c_char,
r.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::Py_DECREF(py_ptr);
new_ptr
} else { py_ptr }
} else { py_ptr };
if final_ptr.is_null() { continue; }
let is_tyer = v24_bytes == b"TYER";
let key_ptr = if is_tyer {
let tdrc_key = intern_tag_key(b"TDRC");
if pyo3::ffi::PyDict_Contains(dict_ptr, tdrc_key) != 0 {
pyo3::ffi::Py_DECREF(tdrc_key);
pyo3::ffi::Py_DECREF(final_ptr);
continue;
}
tdrc_key
} else {
intern_tag_key(v24_bytes)
};
let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
if existing.is_null() {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, final_ptr);
key_ptrs.push(key_ptr);
} else {
if pyo3::ffi::PyList_Check(existing) != 0 {
pyo3::ffi::PyList_Append(existing, final_ptr);
} else {
let list = pyo3::ffi::PyList_New(2);
pyo3::ffi::Py_INCREF(existing);
pyo3::ffi::PyList_SET_ITEM(list, 0, existing);
pyo3::ffi::Py_INCREF(final_ptr);
pyo3::ffi::PyList_SET_ITEM(list, 1, final_ptr);
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list);
pyo3::ffi::Py_DECREF(list);
}
pyo3::ffi::Py_DECREF(key_ptr);
}
pyo3::ffi::Py_DECREF(final_ptr);
continue;
}
}
}
if let Ok(frame) = id3::frames::parse_frame(v24_id, frame_data) {
let key = frame.hash_key();
let py_val = frame_to_py(py, &frame);
unsafe {
let key_ptr = intern_tag_key(key.as_str().as_bytes());
if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
key_ptrs.push(key_ptr);
} else {
pyo3::ffi::Py_DECREF(key_ptr);
}
}
}
}
}
#[inline(always)]
fn fast_walk_v2x_frames(
py: Python<'_>, tag_bytes: &[u8], offset: &mut usize, version: u8, bpi: u8,
dict_ptr: *mut pyo3::ffi::PyObject, key_ptrs: &mut Vec<*mut pyo3::ffi::PyObject>,
) {
while *offset + 10 <= tag_bytes.len() {
if tag_bytes[*offset] == 0 { break; }
let id_bytes = &tag_bytes[*offset..*offset+4];
if !id_bytes.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) {
*offset += 1;
while *offset + 10 <= tag_bytes.len() {
if tag_bytes[*offset] == 0 { break; }
let next_id = &tag_bytes[*offset..*offset+4];
if next_id.iter().all(|&b| b.is_ascii_uppercase() || b.is_ascii_digit()) { break; }
*offset += 1;
}
continue;
}
let size = id3::header::BitPaddedInt::decode(&tag_bytes[*offset+4..*offset+8], bpi) as usize;
let flags = u16::from_be_bytes([tag_bytes[*offset+8], tag_bytes[*offset+9]]);
*offset += 10;
if size == 0 { continue; }
if *offset + size > tag_bytes.len() { break; }
let (compressed, encrypted, unsynchronised, has_data_length) = if version == 4 {
(flags & 0x0008 != 0, flags & 0x0004 != 0, flags & 0x0002 != 0, flags & 0x0001 != 0)
} else {
(flags & 0x0080 != 0, flags & 0x0040 != 0, false, flags & 0x0080 != 0)
};
let id_str = std::str::from_utf8(id_bytes).unwrap_or("XXXX");
if !encrypted && !compressed && !unsynchronised && !has_data_length {
let frame_data = &tag_bytes[*offset..*offset+size];
*offset += size;
if id_bytes[0] == b'T' && id_str != "TXXX" && id_str != "TIPL" && id_str != "TMCL" && id_str != "IPLS" {
unsafe {
if let Some(py_ptr) = try_text_frame_to_py(frame_data) {
let final_ptr = if id_bytes == b"TCON" {
let py_str = pyo3::ffi::PyUnicode_AsUTF8(py_ptr);
if !py_str.is_null() {
let s = std::ffi::CStr::from_ptr(py_str).to_string_lossy();
let resolved = resolve_tcon_genre(&s);
let r = resolved.as_bytes();
let new_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
r.as_ptr() as *const std::ffi::c_char,
r.len() as pyo3::ffi::Py_ssize_t);
pyo3::ffi::Py_DECREF(py_ptr);
new_ptr
} else { py_ptr }
} else { py_ptr };
if final_ptr.is_null() { continue; }
let is_tyer = id_bytes == b"TYER";
let key_ptr = if is_tyer {
let tdrc_key = intern_tag_key(b"TDRC");
if pyo3::ffi::PyDict_Contains(dict_ptr, tdrc_key) != 0 {
pyo3::ffi::Py_DECREF(tdrc_key);
pyo3::ffi::Py_DECREF(final_ptr);
continue;
}
tdrc_key
} else {
intern_tag_key(id_bytes)
};
let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
if existing.is_null() {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, final_ptr);
key_ptrs.push(key_ptr);
} else {
if pyo3::ffi::PyList_Check(existing) != 0 {
pyo3::ffi::PyList_Append(existing, final_ptr);
} else {
let list = pyo3::ffi::PyList_New(2);
pyo3::ffi::Py_INCREF(existing);
pyo3::ffi::PyList_SET_ITEM(list, 0, existing);
pyo3::ffi::Py_INCREF(final_ptr);
pyo3::ffi::PyList_SET_ITEM(list, 1, final_ptr);
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list);
pyo3::ffi::Py_DECREF(list);
}
pyo3::ffi::Py_DECREF(key_ptr);
}
pyo3::ffi::Py_DECREF(final_ptr);
continue;
}
}
}
if id_bytes[0] == b'W' && id_str != "WXXX" {
let mut flen = frame_data.len();
while flen > 0 && frame_data[flen-1] == 0 { flen -= 1; }
if flen > 0 && frame_data[..flen].iter().all(|&b| b < 128) {
unsafe {
let py_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
frame_data.as_ptr() as *const std::ffi::c_char, flen as pyo3::ffi::Py_ssize_t);
if !py_ptr.is_null() {
let key_ptr = intern_tag_key(id_bytes);
if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_ptr);
key_ptrs.push(key_ptr);
} else {
pyo3::ffi::Py_DECREF(key_ptr);
}
pyo3::ffi::Py_DECREF(py_ptr);
continue;
}
}
}
}
if let Ok(frame) = id3::frames::parse_frame(id_str, frame_data) {
emit_frame_to_dict(py, &frame, id_str, dict_ptr, key_ptrs);
}
} else {
let mut frame_data = tag_bytes[*offset..*offset+size].to_vec();
*offset += size;
if encrypted { continue; }
if has_data_length && frame_data.len() >= 4 {
frame_data = frame_data[4..].to_vec();
}
if unsynchronised {
frame_data = match id3::unsynch::decode(&frame_data) {
Ok(d) => d,
Err(_) => continue,
};
}
if compressed {
frame_data = match id3::tags::decompress_zlib(&frame_data) {
Ok(d) => d,
Err(_) => continue,
};
}
if let Ok(frame) = id3::frames::parse_frame(id_str, &frame_data) {
emit_frame_to_dict(py, &frame, id_str, dict_ptr, key_ptrs);
}
}
}
}
fn emit_frame_to_dict(
py: Python<'_>, frame: &id3::frames::Frame, id_str: &str,
dict_ptr: *mut pyo3::ffi::PyObject, key_ptrs: &mut Vec<*mut pyo3::ffi::PyObject>,
) {
let frame_ref;
let resolved_frame;
let actual_frame = if id_str == "TCON" {
if let id3::frames::Frame::Text(tf) = frame {
let resolved_text: Vec<String> = tf.text.iter()
.map(|t| resolve_tcon_genre(t))
.collect();
resolved_frame = id3::frames::Frame::Text(id3::frames::TextFrame {
id: tf.id.clone(),
encoding: tf.encoding,
text: resolved_text,
});
frame_ref = &resolved_frame;
frame_ref
} else { frame }
} else { frame };
let key = actual_frame.hash_key();
let py_val = frame_to_py(py, actual_frame);
unsafe {
let key_ptr = if id_str == "TYER" {
let tdrc_key = intern_tag_key(b"TDRC");
if pyo3::ffi::PyDict_Contains(dict_ptr, tdrc_key) != 0 {
pyo3::ffi::Py_DECREF(tdrc_key);
return; }
tdrc_key
} else {
intern_tag_key(key.as_str().as_bytes())
};
let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
if existing.is_null() {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
key_ptrs.push(key_ptr);
} else {
if let id3::frames::Frame::Text(_) = actual_frame {
if pyo3::ffi::PyList_Check(existing) != 0 {
if pyo3::ffi::PyList_Check(py_val.as_ptr()) != 0 {
let n = pyo3::ffi::PyList_Size(py_val.as_ptr());
for i in 0..n {
let item = pyo3::ffi::PyList_GetItem(py_val.as_ptr(), i);
pyo3::ffi::PyList_Append(existing, item);
}
} else {
pyo3::ffi::PyList_Append(existing, py_val.as_ptr());
}
} else {
let list = pyo3::ffi::PyList_New(0);
pyo3::ffi::PyList_Append(list, existing);
if pyo3::ffi::PyList_Check(py_val.as_ptr()) != 0 {
let n = pyo3::ffi::PyList_Size(py_val.as_ptr());
for i in 0..n {
let item = pyo3::ffi::PyList_GetItem(py_val.as_ptr(), i);
pyo3::ffi::PyList_Append(list, item);
}
} else {
pyo3::ffi::PyList_Append(list, py_val.as_ptr());
}
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list);
pyo3::ffi::Py_DECREF(list);
}
}
pyo3::ffi::Py_DECREF(key_ptr);
}
}
}
#[inline(always)]
fn parse_vc_to_dict_direct<'py>(
_py: Python<'py>,
data: &[u8],
dict: &Bound<'py, PyDict>,
keys_out: &mut Vec<*mut pyo3::ffi::PyObject>,
) -> PyResult<()> {
if data.len() < 8 { return Ok(()); }
let mut pos = 0;
let vendor_len = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + vendor_len > data.len() { return Ok(()); }
pos += vendor_len;
if pos + 4 > data.len() { return Ok(()); }
let count = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
let dict_ptr = dict.as_ptr();
for _ in 0..count {
if pos + 4 > data.len() { break; }
let clen = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize;
pos += 4;
if pos + clen > data.len() { break; }
let raw = &data[pos..pos + clen];
pos += clen;
let eq_pos = match memchr::memchr(b'=', raw) {
Some(p) => p,
None => continue,
};
let key_bytes = &raw[..eq_pos];
let value_bytes = &raw[eq_pos + 1..];
unsafe {
let mut buf = [0u8; 128];
let key_len = key_bytes.len().min(128);
for i in 0..key_len { buf[i] = key_bytes[i].to_ascii_lowercase(); }
let key_ptr = intern_tag_key(&buf[..key_len]);
if key_ptr.is_null() { pyo3::ffi::PyErr_Clear(); continue; }
let val_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
value_bytes.as_ptr() as *const std::ffi::c_char,
value_bytes.len() as pyo3::ffi::Py_ssize_t);
if val_ptr.is_null() {
pyo3::ffi::PyErr_Clear();
pyo3::ffi::Py_DECREF(key_ptr);
continue;
}
let existing = pyo3::ffi::PyDict_GetItem(dict_ptr, key_ptr);
if !existing.is_null() {
if pyo3::ffi::PyList_Check(existing) != 0 {
pyo3::ffi::PyList_Append(existing, val_ptr);
pyo3::ffi::Py_DECREF(val_ptr); } else {
let list_ptr = pyo3::ffi::PyList_New(2);
pyo3::ffi::Py_INCREF(existing); pyo3::ffi::PyList_SET_ITEM(list_ptr, 0, existing);
pyo3::ffi::PyList_SET_ITEM(list_ptr, 1, val_ptr); pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, list_ptr);
pyo3::ffi::Py_DECREF(list_ptr);
}
pyo3::ffi::Py_DECREF(key_ptr);
} else {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, val_ptr);
pyo3::ffi::Py_DECREF(val_ptr);
keys_out.push(key_ptr);
}
}
}
Ok(())
}
#[inline(always)]
fn fast_read_flac_direct<'py>(py: Python<'py>, data: &[u8], file_size: usize, dict: &Bound<'py, PyDict>) -> PyResult<bool> {
let flac_offset = if data.len() >= 4 && &data[0..4] == b"fLaC" {
0
} else if data.len() >= 10 && &data[0..3] == b"ID3" {
let size = crate::id3::header::BitPaddedInt::syncsafe(&data[6..10]) as usize;
let off = 10 + size;
if off + 4 > data.len() || &data[off..off+4] != b"fLaC" { return Ok(false); }
off
} else {
return Ok(false);
};
let mut pos = flac_offset + 4;
let mut streaminfo: Option<flac::StreamInfo> = None;
let mut vc_data: Option<&[u8]> = None;
let mut picture_blocks: Vec<(usize, usize)> = Vec::new();
loop {
if pos + 4 > data.len() { break; }
let header = data[pos];
let is_last = header & 0x80 != 0;
let bt = header & 0x7F;
let block_size = ((data[pos+1] as usize) << 16) | ((data[pos+2] as usize) << 8) | (data[pos+3] as usize);
pos += 4;
if pos + block_size > data.len() { break; }
match bt {
0 => {
if let Ok(si) = flac::StreamInfo::parse(&data[pos..pos+block_size]) {
streaminfo = Some(si);
}
}
4 => {
let vc_size = flac::compute_vc_data_size(&data[pos..]).unwrap_or(block_size);
let end = pos.saturating_add(vc_size).min(data.len());
vc_data = Some(&data[pos..end]);
}
6 => {
picture_blocks.push((pos, block_size));
}
_ => {}
}
pos += block_size;
if is_last { break; }
}
let si = match streaminfo {
Some(si) => si,
None => return Ok(false),
};
let audio_data_size = file_size.saturating_sub(pos);
let bitrate = if si.length > 0.0 {
(audio_data_size as f64 * 8.0 / si.length) as u32
} else { 0 };
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), si.length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), si.sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), si.channels as u32);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bits_per_sample").as_ptr(), si.bits_per_sample as u32);
set_dict_i64(dict_ptr, pyo3::intern!(py, "total_samples").as_ptr(), si.total_samples as i64);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), bitrate);
}
let mut keys_out: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
if let Some(vc) = vc_data {
parse_vc_to_dict_direct(py, vc, dict, &mut keys_out)?;
}
if !picture_blocks.is_empty() {
let pics = PyList::empty(py);
for (pic_pos, pic_size) in &picture_blocks {
if let Ok(pic) = flac::FLACPicture::parse(&data[*pic_pos..*pic_pos + *pic_size]) {
let d = PyDict::new(py);
let _ = d.set_item("type", pic.pic_type);
let _ = d.set_item("mime", &pic.mime);
let _ = d.set_item("desc", &pic.desc);
let _ = d.set_item("width", pic.width);
let _ = d.set_item("height", pic.height);
let _ = d.set_item("depth", pic.depth);
let _ = d.set_item("colors", pic.colors);
let _ = d.set_item("data", pyo3::types::PyBytes::new(py, &pic.data));
let _ = pics.append(d);
}
}
let _ = dict.set_item(pyo3::intern!(py, "_pictures"), pics);
}
set_keys_list(py, dict, keys_out)?;
unsafe {
let fmt = pyo3::ffi::PyUnicode_InternFromString(b"flac\0".as_ptr() as *const std::ffi::c_char);
pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
pyo3::ffi::Py_DECREF(fmt);
}
Ok(true)
}
#[inline(always)]
fn fast_read_ogg_direct<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
if data.len() < 58 || &data[0..4] != b"OggS" { return Ok(false); }
let serial = u32::from_le_bytes([data[14], data[15], data[16], data[17]]);
let num_seg = data[26] as usize;
let seg_table_end = 27 + num_seg;
if seg_table_end > data.len() { return Ok(false); }
let page_data_size: usize = data[27..seg_table_end].iter().map(|&s| s as usize).sum();
let first_page_end = seg_table_end + page_data_size;
if seg_table_end + 30 > data.len() { return Ok(false); }
let id_data = &data[seg_table_end..];
if id_data.len() < 30 || &id_data[0..7] != b"\x01vorbis" { return Ok(false); }
let channels = id_data[11];
let sample_rate = u32::from_le_bytes([id_data[12], id_data[13], id_data[14], id_data[15]]);
let nominal_bitrate = u32::from_le_bytes([id_data[20], id_data[21], id_data[22], id_data[23]]);
if first_page_end + 27 > data.len() { return Ok(false); }
if &data[first_page_end..first_page_end+4] != b"OggS" { return Ok(false); }
let seg2_count = data[first_page_end + 26] as usize;
let seg2_table_start = first_page_end + 27;
let seg2_table_end = seg2_table_start + seg2_count;
if seg2_table_end > data.len() { return Ok(false); }
let seg2_table = &data[seg2_table_start..seg2_table_end];
let mut first_packet_size = 0usize;
let mut single_page = false;
for &seg in seg2_table {
first_packet_size += seg as usize;
if seg < 255 { single_page = true; break; }
}
let length = ogg::find_last_granule(data, serial)
.map(|g| if g > 0 && sample_rate > 0 { g as f64 / sample_rate as f64 } else { 0.0 })
.unwrap_or(0.0);
let bitrate = if nominal_bitrate > 0 {
nominal_bitrate
} else if length > 0.0 {
(data.len() as f64 * 8.0 / length) as u32
} else { 0 };
let dict_ptr_ogg = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr_ogg, pyo3::intern!(py, "length").as_ptr(), length);
set_dict_u32(dict_ptr_ogg, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
set_dict_u32(dict_ptr_ogg, pyo3::intern!(py, "channels").as_ptr(), channels as u32);
set_dict_u32(dict_ptr_ogg, pyo3::intern!(py, "bitrate").as_ptr(), bitrate);
}
let mut keys_out: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
if single_page {
let comment_start = seg2_table_end;
if comment_start + first_packet_size > data.len() { return Ok(false); }
if first_packet_size < 7 { return Ok(false); }
if &data[comment_start..comment_start+7] != b"\x03vorbis" { return Ok(false); }
let vc_data = &data[comment_start + 7..comment_start + first_packet_size];
parse_vc_to_dict_direct(py, vc_data, dict, &mut keys_out)?;
} else {
let comment_packet = match ogg::ogg_assemble_first_packet(data, first_page_end) {
Some(p) => p,
None => return Ok(false),
};
if comment_packet.len() < 7 { return Ok(false); }
if &comment_packet[0..7] != b"\x03vorbis" { return Ok(false); }
parse_vc_to_dict_direct(py, &comment_packet[7..], dict, &mut keys_out)?;
}
set_keys_list(py, dict, keys_out)?;
unsafe {
let fmt = pyo3::ffi::PyUnicode_InternFromString(b"ogg\0".as_ptr() as *const std::ffi::c_char);
pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
pyo3::ffi::Py_DECREF(fmt);
}
Ok(true)
}
#[inline(always)]
fn fast_read_mp3_direct<'py>(py: Python<'py>, data: &[u8], _path: &str, dict: &Bound<'py, PyDict>) -> PyResult<bool> {
let file_size = data.len() as u64;
let (id3_header, audio_start) = if data.len() >= 10 {
match id3::header::ID3Header::parse(&data[0..10], 0) {
Ok(h) => {
let tag_size = h.size as usize;
if 10 + tag_size <= data.len() {
let astart = h.full_size() as usize;
(Some(h), astart)
} else { (None, 0) }
}
Err(_) => (None, 0),
}
} else { (None, 0) };
let audio_end = data.len().min(audio_start + 8192);
let audio_data = if audio_start < data.len() { &data[audio_start..audio_end] } else { &[] };
let info = match mp3::MPEGInfo::parse(audio_data, 0, file_size.saturating_sub(audio_start as u64)) {
Ok(i) => i,
Err(_) => return Ok(false),
};
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), info.length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), info.sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), info.channels);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), info.bitrate);
set_dict_f64(dict_ptr, pyo3::intern!(py, "version").as_ptr(), info.version);
set_dict_i64(dict_ptr, pyo3::intern!(py, "layer").as_ptr(), info.layer as i64);
set_dict_i64(dict_ptr, pyo3::intern!(py, "mode").as_ptr(), info.mode as i64);
set_dict_bool(dict_ptr, pyo3::intern!(py, "protected").as_ptr(), info.protected);
set_dict_i64(dict_ptr, pyo3::intern!(py, "bitrate_mode").as_ptr(), match info.bitrate_mode {
mp3::xing::BitrateMode::Unknown => 0,
mp3::xing::BitrateMode::CBR => 1,
mp3::xing::BitrateMode::VBR => 2,
mp3::xing::BitrateMode::ABR => 3,
});
}
let mut key_ptrs: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
if let Some(ref h) = id3_header {
let tag_size = h.size as usize;
let version = h.version.0;
let decoded_buf;
let tag_bytes: &[u8] = if h.flags.unsynchronisation && version < 4 {
decoded_buf = id3::unsynch::decode(&data[10..10 + tag_size]).unwrap_or_default();
&decoded_buf[..]
} else {
&data[10..10 + tag_size]
};
let mut offset = 0usize;
if h.flags.extended && version >= 3 && tag_bytes.len() >= 4 {
let ext_size = if version == 4 {
id3::header::BitPaddedInt::syncsafe(&tag_bytes[0..4]) as usize
} else {
u32::from_be_bytes([tag_bytes[0], tag_bytes[1], tag_bytes[2], tag_bytes[3]]) as usize
};
offset = if version == 4 { ext_size } else { ext_size + 4 };
}
let bpi = if version == 4 {
id3::header::determine_bpi(&tag_bytes[offset..], tag_bytes.len())
} else { 8 };
if version == 2 {
fast_walk_v22_frames(py, tag_bytes, &mut offset, dict_ptr, &mut key_ptrs);
} else {
fast_walk_v2x_frames(py, tag_bytes, &mut offset, version, bpi, dict_ptr, &mut key_ptrs);
}
}
if data.len() >= 128 {
let v1_data = &data[data.len() - 128..];
if v1_data.len() >= 3 && &v1_data[0..3] == b"TAG" {
if let Ok(v1_frames) = id3::id3v1::parse_id3v1(v1_data) {
for frame in v1_frames {
let key = frame.hash_key();
let key_str = key.as_str();
unsafe {
let key_ptr = intern_tag_key(key_str.as_bytes());
if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
let py_val = frame_to_py(py, &frame);
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val.as_ptr());
key_ptrs.push(key_ptr);
} else {
pyo3::ffi::Py_DECREF(key_ptr);
}
}
}
}
}
}
set_keys_list(py, dict, key_ptrs)?;
unsafe {
let fmt = pyo3::ffi::PyUnicode_InternFromString(b"mp3\0".as_ptr() as *const std::ffi::c_char);
pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
pyo3::ffi::Py_DECREF(fmt);
let has_tags = if id3_header.is_some() { pyo3::ffi::Py_True() } else { pyo3::ffi::Py_False() };
pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_has_tags").as_ptr(), has_tags);
}
Ok(true)
}
#[inline(always)]
fn fast_read_mp4_direct<'py>(py: Python<'py>, data: &[u8], _path: &str, dict: &Bound<'py, PyDict>) -> PyResult<bool> {
use mp4::atom::AtomIter;
let moov = match AtomIter::new(data, 0, data.len()).find_name(b"moov") {
Some(a) => a,
None => return Ok(false),
};
let moov_s = moov.data_offset;
let moov_e = moov_s + moov.data_size;
let mut duration = 0u64;
let mut timescale = 1000u32;
if let Some(mvhd) = AtomIter::new(data, moov_s, moov_e).find_name(b"mvhd") {
let d = &data[mvhd.data_offset..mvhd.data_offset + mvhd.data_size.min(32)];
if !d.is_empty() {
let version = d[0];
if version == 0 && d.len() >= 20 {
timescale = u32::from_be_bytes([d[12], d[13], d[14], d[15]]);
duration = u32::from_be_bytes([d[16], d[17], d[18], d[19]]) as u64;
} else if version == 1 && d.len() >= 32 {
timescale = u32::from_be_bytes([d[20], d[21], d[22], d[23]]);
duration = u64::from_be_bytes([d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]]);
}
}
}
let length = if timescale > 0 { duration as f64 / timescale as f64 } else { 0.0 };
let mut channels = 2u32;
let mut sample_rate = 44100u32;
let mut bits_per_sample = 16u32;
let mut codec_bytes: [u8; 4] = *b"mp4a";
let mut esds_bitrate = 0u32;
'trak_loop: for trak in AtomIter::new(data, moov_s, moov_e) {
if trak.name != *b"trak" { continue; }
let trak_s = trak.data_offset;
let trak_e = trak_s + trak.data_size;
let mdia = match AtomIter::new(data, trak_s, trak_e).find_name(b"mdia") {
Some(a) => a, None => continue,
};
let mdia_s = mdia.data_offset;
let mdia_e = mdia_s + mdia.data_size;
let is_audio = AtomIter::new(data, mdia_s, mdia_e).any(|a| {
if a.name == *b"hdlr" {
let d = &data[a.data_offset..a.data_offset + a.data_size.min(12)];
d.len() >= 12 && &d[8..12] == b"soun"
} else { false }
});
if !is_audio { continue; }
let minf = match AtomIter::new(data, mdia_s, mdia_e).find_name(b"minf") {
Some(a) => a, None => continue,
};
let stbl = match AtomIter::new(data, minf.data_offset, minf.data_offset + minf.data_size).find_name(b"stbl") {
Some(a) => a, None => continue,
};
let stsd = match AtomIter::new(data, stbl.data_offset, stbl.data_offset + stbl.data_size).find_name(b"stsd") {
Some(a) => a, None => continue,
};
let stsd_data = &data[stsd.data_offset..stsd.data_offset + stsd.data_size];
if stsd_data.len() >= 16 {
let entry_data = &stsd_data[8..];
if entry_data.len() >= 36 {
let entry_size = u32::from_be_bytes([entry_data[0], entry_data[1], entry_data[2], entry_data[3]]) as usize;
codec_bytes.copy_from_slice(&entry_data[4..8]);
let audio_entry = &entry_data[8..];
if audio_entry.len() >= 20 {
channels = u16::from_be_bytes([audio_entry[16], audio_entry[17]]) as u32;
bits_per_sample = u16::from_be_bytes([audio_entry[18], audio_entry[19]]) as u32;
if audio_entry.len() >= 28 {
sample_rate = u16::from_be_bytes([audio_entry[24], audio_entry[25]]) as u32;
}
}
if entry_size > 36 && audio_entry.len() >= entry_size - 8 {
let sub_start = stsd.data_offset + 8 + 8 + 28;
let sub_end = stsd.data_offset + 8 + entry_size;
if sub_end <= data.len() {
for sub in AtomIter::new(data, sub_start, sub_end) {
if sub.name == *b"esds" {
let esds = &data[sub.data_offset..sub.data_offset + sub.data_size];
esds_bitrate = mp4::parse_esds_bitrate(esds);
break;
}
}
}
}
}
}
break 'trak_loop;
}
let bitrate = if esds_bitrate > 0 {
esds_bitrate
} else if length > 0.0 {
(data.len() as f64 * 8.0 / length) as u32
} else { 0 };
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), channels);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), bitrate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bits_per_sample").as_ptr(), bits_per_sample);
let codec_ptr = pyo3::ffi::PyUnicode_FromStringAndSize(
codec_bytes.as_ptr() as *const std::ffi::c_char, 4);
pyo3::ffi::PyDict_SetItem(dict_ptr, pyo3::intern!(py, "codec").as_ptr(), codec_ptr);
pyo3::ffi::Py_DECREF(codec_ptr);
}
let mut key_ptrs: Vec<*mut pyo3::ffi::PyObject> = Vec::with_capacity(16);
if let Some(udta) = AtomIter::new(data, moov_s, moov_e).find_name(b"udta") {
if let Some(meta) = AtomIter::new(data, udta.data_offset, udta.data_offset + udta.data_size).find_name(b"meta") {
let meta_off = meta.data_offset + 4;
let meta_end = meta.data_offset + meta.data_size;
if meta_off < meta_end {
if let Some(ilst) = AtomIter::new(data, meta_off, meta_end).find_name(b"ilst") {
for item in AtomIter::new(data, ilst.data_offset, ilst.data_offset + ilst.data_size) {
let key_ptr = if item.name == *b"----" {
let freeform_key = mp4::build_freeform_key(data, item.data_offset, item.data_offset + item.data_size);
unsafe {
let bytes = freeform_key.as_bytes();
pyo3::ffi::PyUnicode_FromStringAndSize(
bytes.as_ptr() as *const std::ffi::c_char,
bytes.len() as pyo3::ffi::Py_ssize_t)
}
} else {
unsafe { mp4_atom_name_to_py_key(&item.name) }
};
if key_ptr.is_null() { continue; }
for da in AtomIter::new(data, item.data_offset, item.data_offset + item.data_size) {
if da.name != *b"data" { continue; }
let ad = &data[da.data_offset..da.data_offset + da.data_size];
if ad.len() < 8 { continue; }
let type_ind = u32::from_be_bytes([ad[0], ad[1], ad[2], ad[3]]);
let vd = &ad[8..];
let py_val = unsafe { mp4_data_to_py_raw(py, &item.name, type_ind, vd) };
if !py_val.is_null() {
unsafe {
if pyo3::ffi::PyDict_Contains(dict_ptr, key_ptr) == 0 {
pyo3::ffi::PyDict_SetItem(dict_ptr, key_ptr, py_val);
key_ptrs.push(key_ptr);
} else {
pyo3::ffi::Py_DECREF(key_ptr);
}
pyo3::ffi::Py_DECREF(py_val);
}
} else {
unsafe { pyo3::ffi::Py_DECREF(key_ptr); }
}
break; }
}
}
}
}
}
set_keys_list(py, dict, key_ptrs)?;
unsafe {
let fmt = pyo3::ffi::PyUnicode_InternFromString(b"mp4\0".as_ptr() as *const std::ffi::c_char);
pyo3::ffi::PyDict_SetItem(dict.as_ptr(), pyo3::intern!(py, "_format").as_ptr(), fmt);
pyo3::ffi::Py_DECREF(fmt);
}
Ok(true)
}
#[inline(always)]
unsafe fn mp4_atom_name_to_py_key(name: &[u8; 4]) -> *mut pyo3::ffi::PyObject {
if name[0] == 0xa9 {
let mut buf = [0u8; 5]; buf[0] = 0xc2; buf[1] = 0xa9;
buf[2] = name[1];
buf[3] = name[2];
buf[4] = name[3];
pyo3::ffi::PyUnicode_FromStringAndSize(buf.as_ptr() as *const std::ffi::c_char, 5)
} else {
pyo3::ffi::PyUnicode_FromStringAndSize(name.as_ptr() as *const std::ffi::c_char, 4)
}
}
#[inline(always)]
unsafe fn mp4_data_to_py_raw(_py: Python<'_>, atom_name: &[u8; 4], type_ind: u32, vd: &[u8]) -> *mut pyo3::ffi::PyObject {
match type_ind {
1 => {
pyo3::ffi::PyUnicode_FromStringAndSize(
vd.as_ptr() as *const std::ffi::c_char, vd.len() as pyo3::ffi::Py_ssize_t)
}
21 => {
let val: i64 = match vd.len() {
1 => vd[0] as i8 as i64,
2 => i16::from_be_bytes([vd[0], vd[1]]) as i64,
4 => i32::from_be_bytes([vd[0], vd[1], vd[2], vd[3]]) as i64,
8 => i64::from_be_bytes([vd[0], vd[1], vd[2], vd[3], vd[4], vd[5], vd[6], vd[7]]),
_ => return std::ptr::null_mut(),
};
pyo3::ffi::PyLong_FromLongLong(val)
}
0 => {
if (atom_name == b"trkn" || atom_name == b"disk") && vd.len() >= 6 {
let a = i16::from_be_bytes([vd[2], vd[3]]) as i64;
let b = i16::from_be_bytes([vd[4], vd[5]]) as i64;
let pa = pyo3::ffi::PyLong_FromLongLong(a);
let pb = pyo3::ffi::PyLong_FromLongLong(b);
let tup = pyo3::ffi::PyTuple_New(2);
pyo3::ffi::PyTuple_SET_ITEM(tup, 0, pa);
pyo3::ffi::PyTuple_SET_ITEM(tup, 1, pb);
tup
} else if atom_name == b"gnre" && vd.len() >= 2 {
let genre_id = u16::from_be_bytes([vd[0], vd[1]]) as usize;
if genre_id > 0 && genre_id <= crate::id3::specs::GENRES.len() {
let g = crate::id3::specs::GENRES[genre_id - 1];
pyo3::ffi::PyUnicode_FromStringAndSize(
g.as_ptr() as *const std::ffi::c_char, g.len() as pyo3::ffi::Py_ssize_t)
} else {
std::ptr::null_mut()
}
} else {
pyo3::ffi::PyBytes_FromStringAndSize(
vd.as_ptr() as *const std::ffi::c_char, vd.len() as pyo3::ffi::Py_ssize_t)
}
}
13 | 14 => {
pyo3::ffi::PyBytes_FromStringAndSize(
vd.as_ptr() as *const std::ffi::c_char, vd.len() as pyo3::ffi::Py_ssize_t)
}
_ => std::ptr::null_mut(),
}
}
#[inline(always)]
fn fast_info_flac<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
let flac_offset = if data.len() >= 4 && &data[0..4] == b"fLaC" {
0
} else if data.len() >= 10 && &data[0..3] == b"ID3" {
let size = id3::header::BitPaddedInt::syncsafe(&data[6..10]) as usize;
let off = 10 + size;
if off + 4 > data.len() || &data[off..off+4] != b"fLaC" { return Ok(false); }
off
} else {
return Ok(false);
};
let mut pos = flac_offset + 4;
loop {
if pos + 4 > data.len() { break; }
let header = data[pos];
let is_last = header & 0x80 != 0;
let bt = header & 0x7F;
let block_size = ((data[pos+1] as usize) << 16) | ((data[pos+2] as usize) << 8) | (data[pos+3] as usize);
pos += 4;
if pos + block_size > data.len() { break; }
if bt == 0 {
if let Ok(si) = flac::StreamInfo::parse(&data[pos..pos+block_size]) {
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), si.length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), si.sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), si.channels as u32);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bits_per_sample").as_ptr(), si.bits_per_sample as u32);
set_dict_i64(dict_ptr, pyo3::intern!(py, "total_samples").as_ptr(), si.total_samples as i64);
}
return Ok(true);
}
}
pos += block_size;
if is_last { break; }
}
Ok(false)
}
#[inline(always)]
fn fast_info_ogg<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
if data.len() < 58 || &data[0..4] != b"OggS" { return Ok(false); }
let serial = u32::from_le_bytes([data[14], data[15], data[16], data[17]]);
let num_seg = data[26] as usize;
let seg_table_end = 27 + num_seg;
if seg_table_end + 30 > data.len() { return Ok(false); }
let id_data = &data[seg_table_end..];
if id_data.len() < 30 || &id_data[0..7] != b"\x01vorbis" { return Ok(false); }
let channels = id_data[11];
let sample_rate = u32::from_le_bytes([id_data[12], id_data[13], id_data[14], id_data[15]]);
let length = ogg::find_last_granule(data, serial)
.map(|g| if g > 0 && sample_rate > 0 { g as f64 / sample_rate as f64 } else { 0.0 })
.unwrap_or(0.0);
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), channels as u32);
}
Ok(true)
}
#[inline(always)]
fn fast_info_mp3<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
let file_size = data.len() as u64;
let audio_start = if data.len() >= 10 {
match id3::header::ID3Header::parse(&data[0..10], 0) {
Ok(h) => {
let tag_size = h.size as usize;
if 10 + tag_size <= data.len() { h.full_size() as usize } else { 0 }
}
Err(_) => 0,
}
} else { 0 };
let audio_end = data.len().min(audio_start + 8192);
let audio_data = if audio_start < data.len() { &data[audio_start..audio_end] } else { &[] };
let info = match mp3::MPEGInfo::parse(audio_data, 0, file_size.saturating_sub(audio_start as u64)) {
Ok(i) => i,
Err(_) => return Ok(false),
};
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), info.length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), info.sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), info.channels);
set_dict_u32(dict_ptr, pyo3::intern!(py, "bitrate").as_ptr(), info.bitrate);
}
Ok(true)
}
#[inline(always)]
fn fast_info_mp4<'py>(py: Python<'py>, data: &[u8], dict: &Bound<'py, PyDict>) -> PyResult<bool> {
use mp4::atom::AtomIter;
let moov = match AtomIter::new(data, 0, data.len()).find_name(b"moov") {
Some(a) => a,
None => return Ok(false),
};
let moov_s = moov.data_offset;
let moov_e = moov_s + moov.data_size;
let mut duration = 0u64;
let mut timescale = 1000u32;
if let Some(mvhd) = AtomIter::new(data, moov_s, moov_e).find_name(b"mvhd") {
let d = &data[mvhd.data_offset..mvhd.data_offset + mvhd.data_size.min(32)];
if !d.is_empty() {
let version = d[0];
if version == 0 && d.len() >= 20 {
timescale = u32::from_be_bytes([d[12], d[13], d[14], d[15]]);
duration = u32::from_be_bytes([d[16], d[17], d[18], d[19]]) as u64;
} else if version == 1 && d.len() >= 32 {
timescale = u32::from_be_bytes([d[20], d[21], d[22], d[23]]);
duration = u64::from_be_bytes([d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]]);
}
}
}
let length = if timescale > 0 { duration as f64 / timescale as f64 } else { 0.0 };
let mut channels = 2u32;
let mut sample_rate = 44100u32;
'trak: for trak in AtomIter::new(data, moov_s, moov_e) {
if trak.name != *b"trak" { continue; }
let ts = trak.data_offset;
let te = ts + trak.data_size;
let mdia = match AtomIter::new(data, ts, te).find_name(b"mdia") { Some(a) => a, None => continue };
let ms = mdia.data_offset;
let me = ms + mdia.data_size;
let is_audio = AtomIter::new(data, ms, me).any(|a| {
a.name == *b"hdlr" && {
let d = &data[a.data_offset..a.data_offset + a.data_size.min(12)];
d.len() >= 12 && &d[8..12] == b"soun"
}
});
if !is_audio { continue; }
let minf = match AtomIter::new(data, ms, me).find_name(b"minf") { Some(a) => a, None => continue };
let stbl = match AtomIter::new(data, minf.data_offset, minf.data_offset + minf.data_size).find_name(b"stbl") { Some(a) => a, None => continue };
let stsd = match AtomIter::new(data, stbl.data_offset, stbl.data_offset + stbl.data_size).find_name(b"stsd") { Some(a) => a, None => continue };
let stsd_data = &data[stsd.data_offset..stsd.data_offset + stsd.data_size];
if stsd_data.len() >= 16 {
let entry = &stsd_data[8..];
if entry.len() >= 36 {
let audio = &entry[8..];
if audio.len() >= 20 {
channels = u16::from_be_bytes([audio[16], audio[17]]) as u32;
if audio.len() >= 28 { sample_rate = u16::from_be_bytes([audio[24], audio[25]]) as u32; }
}
}
}
break 'trak;
}
let dict_ptr = dict.as_ptr();
unsafe {
set_dict_f64(dict_ptr, pyo3::intern!(py, "length").as_ptr(), length);
set_dict_u32(dict_ptr, pyo3::intern!(py, "sample_rate").as_ptr(), sample_rate);
set_dict_u32(dict_ptr, pyo3::intern!(py, "channels").as_ptr(), channels);
}
Ok(true)
}
#[pyfunction]
fn _fast_info(py: Python<'_>, filename: &str) -> PyResult<Py<PyAny>> {
let data = fast_file_read(filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
let dict: Bound<'_, PyDict> = unsafe {
let ptr = pyo3::ffi::PyDict_New();
if ptr.is_null() {
return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
}
Bound::from_owned_ptr(py, ptr).cast_into_unchecked()
};
let ext = filename.rsplit('.').next().unwrap_or("");
let ok = if ext.eq_ignore_ascii_case("flac") {
fast_info_flac(py, &data, &dict)?
} else if ext.eq_ignore_ascii_case("ogg") {
fast_info_ogg(py, &data, &dict)?
} else if ext.eq_ignore_ascii_case("mp3") {
fast_info_mp3(py, &data, &dict)?
} else if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
|| ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
fast_info_mp4(py, &data, &dict)?
} else {
false
};
if !ok {
return Err(PyValueError::new_err(format!("Unable to parse: {}", filename)));
}
Ok(dict.into_any().unbind())
}
#[pyfunction]
fn _fast_read(py: Python<'_>, filename: &str) -> PyResult<Py<PyAny>> {
{
let rcache = get_result_cache();
let guard = rcache.read().unwrap();
if let Some(cached) = guard.get(filename) {
let copy = unsafe { pyo3::ffi::PyDict_Copy(cached.as_ptr()) };
if !copy.is_null() {
return Ok(unsafe { Bound::from_owned_ptr(py, copy).unbind() });
}
}
}
{
let tcache = get_template_cache();
let guard = tcache.read().unwrap();
if let Some(template) = guard.get(filename) {
let copy = unsafe { pyo3::ffi::PyDict_Copy(template.as_ptr()) };
if !copy.is_null() {
let result = unsafe { Bound::from_owned_ptr(py, copy) };
{
let dict_ref: Bound<'_, PyDict> = unsafe { result.clone().cast_into_unchecked() };
let rcache = get_result_cache();
let mut guard = rcache.write().unwrap();
guard.insert(filename.to_string(), dict_ref.unbind());
}
return Ok(result.unbind());
}
}
}
let data = fast_file_read(filename)
.map_err(|e| PyIOError::new_err(format!("{}", e)))?;
let dict: Bound<'_, PyDict> = unsafe {
let ptr = pyo3::ffi::PyDict_New();
if ptr.is_null() {
return Err(pyo3::exceptions::PyMemoryError::new_err("dict alloc failed"));
}
Bound::from_owned_ptr(py, ptr).cast_into_unchecked()
};
let ext = filename.rsplit('.').next().unwrap_or("");
let ok = if ext.eq_ignore_ascii_case("flac") {
fast_read_flac_direct(py, &data, data.len(), &dict)?
} else if ext.eq_ignore_ascii_case("ogg") {
fast_read_ogg_direct(py, &data, &dict)?
} else if ext.eq_ignore_ascii_case("mp3") {
fast_read_mp3_direct(py, &data, filename, &dict)?
} else if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
|| ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
fast_read_mp4_direct(py, &data, filename, &dict)?
} else {
let mp3_score = mp3::MP3File::score(filename, &data);
let flac_score = flac::FLACFile::score(filename, &data);
let ogg_score = ogg::OggVorbisFile::score(filename, &data);
let mp4_score = mp4::MP4File::score(filename, &data);
let max_score = mp3_score.max(flac_score).max(ogg_score).max(mp4_score);
if max_score == 0 { false }
else if max_score == flac_score { fast_read_flac_direct(py, &data, data.len(), &dict)? }
else if max_score == ogg_score { fast_read_ogg_direct(py, &data, &dict)? }
else if max_score == mp4_score { fast_read_mp4_direct(py, &data, filename, &dict)? }
else { fast_read_mp3_direct(py, &data, filename, &dict)? }
};
if !ok {
return Err(PyValueError::new_err(format!("Unable to parse: {}", filename)));
}
let key = filename.to_string();
let dict_copy = dict.clone().unbind();
{
let tcache = get_template_cache();
let mut guard = tcache.write().unwrap();
guard.insert(key.clone(), dict_copy);
}
{
let rcache = get_result_cache();
let mut guard = rcache.write().unwrap();
guard.insert(key, dict.clone().unbind());
}
Ok(dict.into_any().unbind())
}
#[pyfunction]
fn _fast_read_seq(py: Python<'_>, filenames: Vec<String>) -> PyResult<Py<PyAny>> {
unsafe {
let result_ptr = pyo3::ffi::PyList_New(0);
if result_ptr.is_null() {
return Err(pyo3::exceptions::PyMemoryError::new_err("failed to create list"));
}
for filename in &filenames {
let data = match read_cached(filename) {
Ok(d) => d,
Err(_) => continue,
};
let dict_ptr_raw = pyo3::ffi::PyDict_New();
if dict_ptr_raw.is_null() { continue; }
let dict: Bound<'_, PyDict> = Bound::from_owned_ptr(py, dict_ptr_raw).cast_into_unchecked();
let ext = filename.rsplit('.').next().unwrap_or("");
let ok = if ext.eq_ignore_ascii_case("flac") {
fast_read_flac_direct(py, &data, data.len(), &dict).unwrap_or(false)
} else if ext.eq_ignore_ascii_case("ogg") {
fast_read_ogg_direct(py, &data, &dict).unwrap_or(false)
} else if ext.eq_ignore_ascii_case("mp3") {
fast_read_mp3_direct(py, &data, filename, &dict).unwrap_or(false)
} else if ext.eq_ignore_ascii_case("m4a") || ext.eq_ignore_ascii_case("m4b")
|| ext.eq_ignore_ascii_case("mp4") || ext.eq_ignore_ascii_case("m4v") {
fast_read_mp4_direct(py, &data, filename, &dict).unwrap_or(false)
} else {
if let Some(pf) = parse_and_serialize(&data, filename) {
preserialized_to_flat_dict(py, &pf, &dict).unwrap_or(());
true
} else {
false
}
};
if ok {
pyo3::ffi::PyList_Append(result_ptr, dict.as_ptr());
}
}
Ok(Bound::from_owned_ptr(py, result_ptr).unbind())
}
}
#[pymodule]
fn mutagen_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyMP3>()?;
m.add_class::<PyMPEGInfo>()?;
m.add_class::<PyID3>()?;
m.add_class::<PyFLAC>()?;
m.add_class::<PyStreamInfo>()?;
m.add_class::<PyVComment>()?;
m.add_class::<PyOggVorbis>()?;
m.add_class::<PyOggVorbisInfo>()?;
m.add_class::<PyMP4>()?;
m.add_class::<PyMP4Info>()?;
m.add_class::<PyMP4Tags>()?;
m.add_class::<PyBatchResult>()?;
m.add_class::<PyPOPM>()?;
m.add_function(wrap_pyfunction!(file_open, m)?)?;
m.add_function(wrap_pyfunction!(batch_open, m)?)?;
m.add_function(wrap_pyfunction!(batch_diag, m)?)?;
m.add_function(wrap_pyfunction!(clear_cache, m)?)?;
m.add_function(wrap_pyfunction!(clear_all_caches, m)?)?;
m.add_function(wrap_pyfunction!(_rust_batch_open, m)?)?;
m.add_function(wrap_pyfunction!(_fast_read, m)?)?;
m.add_function(wrap_pyfunction!(_fast_info, m)?)?;
m.add_function(wrap_pyfunction!(_fast_read_seq, m)?)?;
m.add_function(wrap_pyfunction!(_fast_batch_read, m)?)?;
m.add("MutagenError", m.py().get_type::<common::error::MutagenPyError>())?;
m.add("ID3Error", m.py().get_type::<common::error::ID3Error>())?;
m.add("ID3NoHeaderError", m.py().get_type::<common::error::ID3NoHeaderError>())?;
m.add("MP3Error", m.py().get_type::<common::error::MP3Error>())?;
m.add("HeaderNotFoundError", m.py().get_type::<common::error::HeaderNotFoundError>())?;
m.add("FLACError", m.py().get_type::<common::error::FLACError>())?;
m.add("FLACNoHeaderError", m.py().get_type::<common::error::FLACNoHeaderError>())?;
m.add("OggError", m.py().get_type::<common::error::OggError>())?;
m.add("MP4Error", m.py().get_type::<common::error::MP4Error>())?;
m.add("File", wrap_pyfunction!(file_open, m)?)?;
Ok(())
}
}