use std::collections::HashMap;
use std::path::Path;
use voirs_sdk::AudioFormat;
#[derive(Debug, Clone, Default)]
pub struct AudioMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub track: Option<u32>,
pub year: Option<u32>,
pub genre: Option<String>,
pub comment: Option<String>,
pub duration: Option<f64>,
pub synthesis_params: HashMap<String, String>,
pub album_art: Option<AlbumArt>,
pub custom_tags: HashMap<String, String>,
pub voice_name: Option<String>,
pub text_source: Option<String>,
pub model_version: Option<String>,
pub language: Option<String>,
pub date: Option<String>,
}
#[derive(Debug, Clone)]
pub struct AlbumArt {
pub data: Vec<u8>,
pub mime_type: String,
pub description: String,
pub picture_type: PictureType,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PictureType {
Other,
Icon,
OtherIcon,
CoverFront,
CoverBack,
Leaflet,
Media,
LeadArtist,
Artist,
Conductor,
Band,
Composer,
Lyricist,
RecordingLocation,
DuringRecording,
DuringPerformance,
MovieScreenCapture,
ColouredFish,
Illustration,
BandLogo,
PublisherLogo,
}
pub struct MetadataWriter {
metadata: AudioMetadata,
}
impl MetadataWriter {
pub fn new() -> Self {
Self {
metadata: AudioMetadata::default(),
}
}
pub fn for_synthesis() -> Self {
let mut metadata = AudioMetadata::default();
metadata.artist = Some("VoiRS".to_string());
metadata.comment = Some("Generated by VoiRS Text-to-Speech".to_string());
metadata
.custom_tags
.insert("ENCODER".to_string(), "VoiRS CLI".to_string());
metadata.custom_tags.insert(
"SOFTWARE".to_string(),
env!("CARGO_PKG_VERSION").to_string(),
);
Self { metadata }
}
pub fn title<S: Into<String>>(mut self, title: S) -> Self {
self.metadata.title = Some(title.into());
self
}
pub fn artist<S: Into<String>>(mut self, artist: S) -> Self {
self.metadata.artist = Some(artist.into());
self
}
pub fn album<S: Into<String>>(mut self, album: S) -> Self {
self.metadata.album = Some(album.into());
self
}
pub fn track(mut self, track: u32) -> Self {
self.metadata.track = Some(track);
self
}
pub fn year(mut self, year: u32) -> Self {
self.metadata.year = Some(year);
self
}
pub fn genre<S: Into<String>>(mut self, genre: S) -> Self {
self.metadata.genre = Some(genre.into());
self
}
pub fn comment<S: Into<String>>(mut self, comment: S) -> Self {
self.metadata.comment = Some(comment.into());
self
}
pub fn duration(mut self, duration: f64) -> Self {
self.metadata.duration = Some(duration);
self
}
pub fn synthesis_param<S: Into<String>>(mut self, key: S, value: S) -> Self {
self.metadata
.synthesis_params
.insert(key.into(), value.into());
self
}
pub fn custom_tag<S: Into<String>>(mut self, key: S, value: S) -> Self {
self.metadata.custom_tags.insert(key.into(), value.into());
self
}
pub fn album_art(mut self, art: AlbumArt) -> Self {
self.metadata.album_art = Some(art);
self
}
pub fn add_synthesis_metadata(
mut self,
text: &str,
voice: &str,
quality: &str,
rate: f32,
pitch: f32,
volume: f32,
) -> Self {
let title = if text.len() > 100 {
format!("{}...", &text[..97])
} else {
text.to_string()
};
self.metadata.title = Some(title);
self.metadata
.synthesis_params
.insert("voice".to_string(), voice.to_string());
self.metadata
.synthesis_params
.insert("quality".to_string(), quality.to_string());
self.metadata
.synthesis_params
.insert("rate".to_string(), rate.to_string());
self.metadata
.synthesis_params
.insert("pitch".to_string(), pitch.to_string());
self.metadata
.synthesis_params
.insert("volume".to_string(), volume.to_string());
self.metadata
.synthesis_params
.insert("original_text".to_string(), text.to_string());
self.metadata
.custom_tags
.insert("SYNTHESIS_ENGINE".to_string(), "VoiRS".to_string());
self.metadata
.custom_tags
.insert("VOICE_MODEL".to_string(), voice.to_string());
self.metadata.custom_tags.insert(
"GENERATION_DATE".to_string(),
chrono::Utc::now()
.format("%Y-%m-%d %H:%M:%S UTC")
.to_string(),
);
self
}
pub fn write_to_file<P: AsRef<Path>>(
&self,
file_path: P,
format: AudioFormat,
) -> Result<(), MetadataError> {
match format {
AudioFormat::Mp3 => self.write_id3_tags(file_path),
AudioFormat::Flac => self.write_vorbis_comments(file_path),
AudioFormat::Ogg => self.write_vorbis_comments(file_path),
AudioFormat::Opus => self.write_opus_tags(file_path),
AudioFormat::Wav => self.write_wav_metadata(file_path),
}
}
fn write_id3_tags<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
let file_path = file_path.as_ref();
tracing::info!("Writing ID3 tags to MP3 file: {}", file_path.display());
let mut file_content = std::fs::read(file_path).map_err(MetadataError::IoError)?;
let id3_tag = self.create_id3_tag_bytes()?;
if file_content.len() >= 10 && &file_content[0..3] == b"ID3" {
let existing_tag_size = self.parse_id3_tag_size(&file_content[0..10])?;
let total_existing_size = 10 + existing_tag_size;
file_content = file_content[total_existing_size..].to_vec();
}
let mut new_content = Vec::new();
new_content.extend_from_slice(&id3_tag);
new_content.extend_from_slice(&file_content);
std::fs::write(file_path, new_content).map_err(MetadataError::IoError)?;
tracing::info!("Successfully wrote ID3 tags to {}", file_path.display());
Ok(())
}
fn create_id3_tag_bytes(&self) -> Result<Vec<u8>, MetadataError> {
let mut tag_data = Vec::new();
if let Some(title) = &self.metadata.title {
tag_data.extend_from_slice(&self.create_id3_frame("TIT2", title)?);
}
if let Some(artist) = &self.metadata.artist {
tag_data.extend_from_slice(&self.create_id3_frame("TPE1", artist)?);
}
if let Some(album) = &self.metadata.album {
tag_data.extend_from_slice(&self.create_id3_frame("TALB", album)?);
}
if let Some(genre) = &self.metadata.genre {
tag_data.extend_from_slice(&self.create_id3_frame("TCON", genre)?);
}
if let Some(date) = &self.metadata.date {
tag_data.extend_from_slice(&self.create_id3_frame("TDRC", date)?);
}
if let Some(comment) = &self.metadata.comment {
tag_data.extend_from_slice(&self.create_id3_frame("COMM", comment)?);
}
if let Some(voice_name) = &self.metadata.voice_name {
tag_data.extend_from_slice(
&self.create_id3_frame("TXXX", &format!("VoiRS_Voice={}", voice_name))?,
);
}
if let Some(text_source) = &self.metadata.text_source {
tag_data.extend_from_slice(
&self.create_id3_frame("TXXX", &format!("VoiRS_TextSource={}", text_source))?,
);
}
if !self.metadata.synthesis_params.is_empty() {
let params_str = self
.metadata
.synthesis_params
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>()
.join(";");
tag_data.extend_from_slice(
&self.create_id3_frame("TXXX", &format!("VoiRS_SynthesisParams={}", params_str))?,
);
}
if let Some(model_version) = &self.metadata.model_version {
tag_data.extend_from_slice(
&self.create_id3_frame("TXXX", &format!("VoiRS_ModelVersion={}", model_version))?,
);
}
if let Some(language) = &self.metadata.language {
tag_data.extend_from_slice(&self.create_id3_frame("TLAN", language)?);
}
let mut header = Vec::new();
header.extend_from_slice(b"ID3"); header.push(0x04); header.push(0x00); header.push(0x00);
let tag_size = tag_data.len() as u32;
header.extend_from_slice(&Self::encode_synchsafe_int(tag_size));
let mut result = Vec::new();
result.extend_from_slice(&header);
result.extend_from_slice(&tag_data);
Ok(result)
}
fn create_id3_frame(&self, frame_id: &str, content: &str) -> Result<Vec<u8>, MetadataError> {
let mut frame = Vec::new();
frame.extend_from_slice(frame_id.as_bytes());
let mut frame_content = Vec::new();
frame_content.push(0x03); frame_content.extend_from_slice(content.as_bytes());
let frame_size = frame_content.len() as u32;
frame.extend_from_slice(&Self::encode_synchsafe_int(frame_size));
frame.push(0x00);
frame.push(0x00);
frame.extend_from_slice(&frame_content);
Ok(frame)
}
fn parse_id3_tag_size(&self, header: &[u8]) -> Result<usize, MetadataError> {
if header.len() < 10 {
return Err(MetadataError::InvalidFormat(
"ID3 header too short".to_string(),
));
}
let size_bytes = &header[6..10];
let size = Self::decode_synchsafe_int(size_bytes)?;
Ok(size as usize)
}
fn encode_synchsafe_int(value: u32) -> [u8; 4] {
[
((value >> 21) & 0x7F) as u8,
((value >> 14) & 0x7F) as u8,
((value >> 7) & 0x7F) as u8,
(value & 0x7F) as u8,
]
}
fn decode_synchsafe_int(bytes: &[u8]) -> Result<u32, MetadataError> {
if bytes.len() < 4 {
return Err(MetadataError::InvalidFormat(
"Invalid synchsafe integer".to_string(),
));
}
let value = ((bytes[0] as u32) << 21)
| ((bytes[1] as u32) << 14)
| ((bytes[2] as u32) << 7)
| (bytes[3] as u32);
Ok(value)
}
fn write_vorbis_comments<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
let path = file_path.as_ref();
tracing::info!("Writing Vorbis comments to FLAC/OGG file: {:?}", path);
if !path.exists() {
return Err(MetadataError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {:?}", path),
)));
}
let mut metadata_path = path.to_path_buf();
metadata_path.set_extension("vorbis_comments.txt");
let mut content = String::new();
content.push_str("# Vorbis Comments for FLAC/OGG file\n");
content.push_str(&format!("# Original file: {:?}\n\n", path));
if let Some(title) = &self.metadata.title {
content.push_str(&format!("TITLE={}\n", title));
}
if let Some(artist) = &self.metadata.artist {
content.push_str(&format!("ARTIST={}\n", artist));
}
if let Some(album) = &self.metadata.album {
content.push_str(&format!("ALBUM={}\n", album));
}
if let Some(comment) = &self.metadata.comment {
content.push_str(&format!("COMMENT={}\n", comment));
}
if let Some(genre) = &self.metadata.genre {
content.push_str(&format!("GENRE={}\n", genre));
}
std::fs::write(&metadata_path, content).map_err(MetadataError::IoError)?;
tracing::info!(
"Vorbis comments written to companion file: {:?}",
metadata_path
);
Ok(())
}
fn write_opus_tags<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
let path = file_path.as_ref();
tracing::info!("Writing Opus tags to file: {:?}", path);
if !path.exists() {
return Err(MetadataError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {:?}", path),
)));
}
let mut metadata_path = path.to_path_buf();
metadata_path.set_extension("opus_tags.txt");
let mut content = String::new();
content.push_str("# Opus Tags (Vorbis Comment format)\n");
content.push_str(&format!("# Original file: {:?}\n\n", path));
if let Some(title) = &self.metadata.title {
content.push_str(&format!("TITLE={}\n", title));
}
if let Some(artist) = &self.metadata.artist {
content.push_str(&format!("ARTIST={}\n", artist));
}
if let Some(album) = &self.metadata.album {
content.push_str(&format!("ALBUM={}\n", album));
}
if let Some(comment) = &self.metadata.comment {
content.push_str(&format!("COMMENT={}\n", comment));
}
if let Some(genre) = &self.metadata.genre {
content.push_str(&format!("GENRE={}\n", genre));
}
std::fs::write(&metadata_path, content).map_err(MetadataError::IoError)?;
tracing::info!("Opus tags written to companion file: {:?}", metadata_path);
Ok(())
}
fn write_wav_metadata<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
let path = file_path.as_ref();
tracing::info!("Writing WAV INFO chunk metadata to: {}", path.display());
let mut file_content = std::fs::read(path).map_err(MetadataError::IoError)?;
if file_content.len() < 12 {
return Err(MetadataError::InvalidFormat(
"WAV file too small".to_string(),
));
}
if &file_content[0..4] != b"RIFF" || &file_content[8..12] != b"WAVE" {
return Err(MetadataError::InvalidFormat(
"Not a valid WAV file".to_string(),
));
}
let info_chunk = self.create_wav_info_chunk()?;
let mut offset = 12; let mut fmt_found = false;
let mut data_end = None;
while offset + 8 <= file_content.len() {
let chunk_id = &file_content[offset..offset + 4];
let chunk_size = u32::from_le_bytes([
file_content[offset + 4],
file_content[offset + 5],
file_content[offset + 6],
file_content[offset + 7],
]) as usize;
if chunk_id == b"fmt " {
fmt_found = true;
} else if chunk_id == b"data" && fmt_found {
data_end = Some(offset + 8 + chunk_size);
break;
}
offset += 8 + chunk_size;
if !chunk_size.is_multiple_of(2) {
offset += 1;
}
}
let insert_point = data_end.unwrap_or(file_content.len());
let mut cleaned_content = Vec::new();
let mut scan_offset = 12;
while scan_offset + 8 <= file_content.len() {
let chunk_id = &file_content[scan_offset..scan_offset + 4];
let chunk_size = u32::from_le_bytes([
file_content[scan_offset + 4],
file_content[scan_offset + 5],
file_content[scan_offset + 6],
file_content[scan_offset + 7],
]) as usize;
if chunk_id == b"LIST"
&& scan_offset + 12 <= file_content.len()
&& &file_content[scan_offset + 8..scan_offset + 12] == b"INFO"
{
scan_offset += 8 + chunk_size;
if !chunk_size.is_multiple_of(2) {
scan_offset += 1;
}
continue;
}
let chunk_end = scan_offset + 8 + chunk_size;
if !chunk_size.is_multiple_of(2) && chunk_end < file_content.len() {
cleaned_content.extend_from_slice(&file_content[scan_offset..chunk_end + 1]);
scan_offset = chunk_end + 1;
} else {
cleaned_content.extend_from_slice(&file_content[scan_offset..chunk_end]);
scan_offset = chunk_end;
}
}
if scan_offset < file_content.len() {
cleaned_content.extend_from_slice(&file_content[scan_offset..]);
}
let mut new_content = Vec::new();
let actual_insert = if insert_point <= cleaned_content.len() {
insert_point
} else {
cleaned_content.len()
};
new_content.extend_from_slice(&cleaned_content[..actual_insert]);
new_content.extend_from_slice(&info_chunk);
if actual_insert < cleaned_content.len() {
new_content.extend_from_slice(&cleaned_content[actual_insert..]);
}
let total_size = new_content.len() - 8;
new_content[4..8].copy_from_slice(&(total_size as u32).to_le_bytes());
std::fs::write(path, new_content).map_err(MetadataError::IoError)?;
tracing::info!("Successfully wrote WAV INFO chunk to {}", path.display());
Ok(())
}
fn create_wav_info_chunk(&self) -> Result<Vec<u8>, MetadataError> {
let mut info_data = Vec::new();
if let Some(title) = &self.metadata.title {
info_data.extend_from_slice(&self.create_wav_info_field("INAM", title));
}
if let Some(artist) = &self.metadata.artist {
info_data.extend_from_slice(&self.create_wav_info_field("IART", artist));
}
if let Some(album) = &self.metadata.album {
info_data.extend_from_slice(&self.create_wav_info_field("IPRD", album));
}
if let Some(comment) = &self.metadata.comment {
info_data.extend_from_slice(&self.create_wav_info_field("ICMT", comment));
}
if let Some(genre) = &self.metadata.genre {
info_data.extend_from_slice(&self.create_wav_info_field("IGNR", genre));
}
if let Some(date) = &self.metadata.date {
info_data.extend_from_slice(&self.create_wav_info_field("ICRD", date));
}
if let Some(voice_name) = &self.metadata.voice_name {
info_data.extend_from_slice(
&self.create_wav_info_field("ISFT", &format!("VoiRS Voice: {}", voice_name)),
);
}
if !self.metadata.synthesis_params.is_empty() {
let params_str = self
.metadata
.synthesis_params
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>()
.join("; ");
info_data.extend_from_slice(&self.create_wav_info_field("ISRC", ¶ms_str));
}
let mut chunk = Vec::new();
chunk.extend_from_slice(b"LIST"); chunk.extend_from_slice(&((info_data.len() + 4) as u32).to_le_bytes()); chunk.extend_from_slice(b"INFO"); chunk.extend_from_slice(&info_data);
if chunk.len() % 2 != 0 {
chunk.push(0);
}
Ok(chunk)
}
fn create_wav_info_field(&self, field_id: &str, value: &str) -> Vec<u8> {
let mut field = Vec::new();
field.extend_from_slice(field_id.as_bytes());
let value_bytes = value.as_bytes();
field.extend_from_slice(&(value_bytes.len() as u32).to_le_bytes()); field.extend_from_slice(value_bytes);
if !value_bytes.len().is_multiple_of(2) {
field.push(0);
}
field
}
}
pub struct MetadataReader;
impl MetadataReader {
pub fn read_from_file<P: AsRef<Path>>(
file_path: P,
format: AudioFormat,
) -> Result<AudioMetadata, MetadataError> {
match format {
AudioFormat::Mp3 => Self::read_id3_tags(file_path),
AudioFormat::Flac => Self::read_vorbis_comments(file_path),
AudioFormat::Ogg => Self::read_vorbis_comments(file_path),
AudioFormat::Opus => Self::read_opus_tags(file_path),
AudioFormat::Wav => Self::read_wav_metadata(file_path),
}
}
fn read_id3_tags<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
let path = file_path.as_ref();
tracing::debug!("Reading ID3 tags from MP3 file: {:?}", path);
if !path.exists() {
return Err(MetadataError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {:?}", path),
)));
}
let mut metadata = AudioMetadata::default();
if let Ok(file_metadata) = std::fs::metadata(path) {
metadata
.custom_tags
.insert("file_size".to_string(), file_metadata.len().to_string());
if let Ok(modified) = file_metadata.modified() {
if let Ok(datetime) = modified.duration_since(std::time::UNIX_EPOCH) {
metadata
.custom_tags
.insert("modified_time".to_string(), datetime.as_secs().to_string());
}
}
}
metadata
.custom_tags
.insert("format".to_string(), "MP3".to_string());
metadata.custom_tags.insert(
"note".to_string(),
"ID3 tag reading requires additional dependencies".to_string(),
);
Ok(metadata)
}
fn read_vorbis_comments<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
let path = file_path.as_ref();
tracing::debug!("Reading Vorbis comments from FLAC/OGG file: {:?}", path);
if !path.exists() {
return Err(MetadataError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {:?}", path),
)));
}
let mut metadata = AudioMetadata::default();
if let Ok(file_metadata) = std::fs::metadata(path) {
metadata
.custom_tags
.insert("file_size".to_string(), file_metadata.len().to_string());
}
let format = if let Some(ext) = path.extension() {
match ext.to_string_lossy().to_lowercase().as_str() {
"flac" => "FLAC",
"ogg" | "oga" => "OGG Vorbis",
_ => "Unknown Vorbis-based format",
}
} else {
"Unknown Vorbis-based format"
};
metadata
.custom_tags
.insert("format".to_string(), format.to_string());
metadata.custom_tags.insert(
"note".to_string(),
"Vorbis comment reading requires additional dependencies (metaflac, lewton)"
.to_string(),
);
Ok(metadata)
}
fn read_opus_tags<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
let path = file_path.as_ref();
tracing::debug!("Reading Opus tags from file: {:?}", path);
if !path.exists() {
return Err(MetadataError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {:?}", path),
)));
}
let mut metadata = AudioMetadata::default();
if let Ok(file_metadata) = std::fs::metadata(path) {
metadata
.custom_tags
.insert("file_size".to_string(), file_metadata.len().to_string());
}
metadata
.custom_tags
.insert("format".to_string(), "Opus".to_string());
metadata.custom_tags.insert(
"note".to_string(),
"Opus tag reading requires additional dependencies (opus crate)".to_string(),
);
metadata.custom_tags.insert(
"metadata_format".to_string(),
"Vorbis Comment-style".to_string(),
);
Ok(metadata)
}
fn read_wav_metadata<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
let path = file_path.as_ref();
match hound::WavReader::open(path) {
Ok(reader) => {
let spec = reader.spec();
let duration = reader.duration() as f64 / spec.sample_rate as f64;
let mut metadata = AudioMetadata::default();
metadata.duration = Some(duration);
metadata
.custom_tags
.insert("sample_rate".to_string(), spec.sample_rate.to_string());
metadata
.custom_tags
.insert("channels".to_string(), spec.channels.to_string());
metadata.custom_tags.insert(
"bits_per_sample".to_string(),
spec.bits_per_sample.to_string(),
);
metadata.custom_tags.insert(
"sample_format".to_string(),
format!("{:?}", spec.sample_format),
);
if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) {
if filename.contains("voirs") || filename.contains("synthesis") {
metadata
.synthesis_params
.insert("generator".to_string(), "VoiRS".to_string());
metadata.comment = Some("Generated by VoiRS speech synthesis".to_string());
metadata.genre = Some("Speech".to_string());
}
}
Ok(metadata)
}
Err(e) => Err(MetadataError::IoError(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Failed to read WAV file: {}", e),
))),
}
}
}
impl AudioMetadata {
pub fn is_synthesized(&self) -> bool {
self.synthesis_params.contains_key("voice")
|| self
.custom_tags
.get("SYNTHESIS_ENGINE")
.is_some_and(|v| v == "VoiRS")
}
pub fn get_original_text(&self) -> Option<&String> {
self.synthesis_params.get("original_text")
}
pub fn format_synthesis_params(&self) -> String {
if self.synthesis_params.is_empty() {
return "No synthesis parameters".to_string();
}
let mut params = Vec::new();
for (key, value) in &self.synthesis_params {
params.push(format!("{}: {}", key, value));
}
params.join(", ")
}
pub fn merge(&mut self, other: &AudioMetadata) {
if other.title.is_some() {
self.title = other.title.clone();
}
if other.artist.is_some() {
self.artist = other.artist.clone();
}
if other.album.is_some() {
self.album = other.album.clone();
}
if other.track.is_some() {
self.track = other.track;
}
if other.year.is_some() {
self.year = other.year;
}
if other.genre.is_some() {
self.genre = other.genre.clone();
}
if other.comment.is_some() {
self.comment = other.comment.clone();
}
if other.duration.is_some() {
self.duration = other.duration;
}
if other.album_art.is_some() {
self.album_art = other.album_art.clone();
}
for (key, value) in &other.synthesis_params {
self.synthesis_params.insert(key.clone(), value.clone());
}
for (key, value) in &other.custom_tags {
self.custom_tags.insert(key.clone(), value.clone());
}
}
}
impl AlbumArt {
pub fn from_file<P: AsRef<Path>>(
file_path: P,
description: String,
picture_type: PictureType,
) -> Result<Self, MetadataError> {
let path = file_path.as_ref();
let data = std::fs::read(path).map_err(MetadataError::IoError)?;
let mime_type = match path.extension().and_then(|ext| ext.to_str()) {
Some("jpg") | Some("jpeg") => "image/jpeg".to_string(),
Some("png") => "image/png".to_string(),
Some("gif") => "image/gif".to_string(),
Some("bmp") => "image/bmp".to_string(),
_ => "application/octet-stream".to_string(),
};
Ok(AlbumArt {
data,
mime_type,
description,
picture_type,
})
}
pub fn from_data(
data: Vec<u8>,
mime_type: String,
description: String,
picture_type: PictureType,
) -> Self {
AlbumArt {
data,
mime_type,
description,
picture_type,
}
}
pub fn get_dimensions(&self) -> Option<(u32, u32)> {
match self.mime_type.as_str() {
"image/jpeg" => self.parse_jpeg_dimensions(),
"image/png" => self.parse_png_dimensions(),
"image/gif" => self.parse_gif_dimensions(),
"image/bmp" => self.parse_bmp_dimensions(),
_ => None,
}
}
fn parse_jpeg_dimensions(&self) -> Option<(u32, u32)> {
if self.data.len() < 10 {
return None;
}
if self.data[0..2] != [0xFF, 0xD8] {
return None;
}
let mut offset = 2;
while offset + 8 < self.data.len() {
if self.data[offset] != 0xFF {
break;
}
let marker = self.data[offset + 1];
offset += 2;
if ((0xC0..=0xC3).contains(&marker)
|| (0xC5..=0xC7).contains(&marker)
|| (0xC9..=0xCB).contains(&marker)
|| (0xCD..=0xCF).contains(&marker))
&& offset + 5 < self.data.len()
{
let height =
u16::from_be_bytes([self.data[offset + 3], self.data[offset + 4]]) as u32;
let width =
u16::from_be_bytes([self.data[offset + 5], self.data[offset + 6]]) as u32;
return Some((width, height));
}
if offset + 1 < self.data.len() {
let length =
u16::from_be_bytes([self.data[offset], self.data[offset + 1]]) as usize;
offset += length;
} else {
break;
}
}
None
}
fn parse_png_dimensions(&self) -> Option<(u32, u32)> {
if self.data.len() < 24 {
return None;
}
if self.data[0..8] != [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] {
return None;
}
if &self.data[12..16] == b"IHDR" {
let width =
u32::from_be_bytes([self.data[16], self.data[17], self.data[18], self.data[19]]);
let height =
u32::from_be_bytes([self.data[20], self.data[21], self.data[22], self.data[23]]);
return Some((width, height));
}
None
}
fn parse_gif_dimensions(&self) -> Option<(u32, u32)> {
if self.data.len() < 10 {
return None;
}
if &self.data[0..6] != b"GIF87a" && &self.data[0..6] != b"GIF89a" {
return None;
}
let width = u16::from_le_bytes([self.data[6], self.data[7]]) as u32;
let height = u16::from_le_bytes([self.data[8], self.data[9]]) as u32;
Some((width, height))
}
fn parse_bmp_dimensions(&self) -> Option<(u32, u32)> {
if self.data.len() < 26 {
return None;
}
if &self.data[0..2] != b"BM" {
return None;
}
let width =
u32::from_le_bytes([self.data[18], self.data[19], self.data[20], self.data[21]]);
let height =
u32::from_le_bytes([self.data[22], self.data[23], self.data[24], self.data[25]]);
Some((width, height))
}
}
#[derive(Debug, thiserror::Error)]
pub enum MetadataError {
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Unsupported format: {0}")]
UnsupportedFormat(String),
#[error("Invalid metadata: {0}")]
InvalidMetadata(String),
#[error("Invalid format: {0}")]
InvalidFormat(String),
#[error("Encoding error: {0}")]
EncodingError(String),
}
pub fn create_synthesis_metadata(
text: &str,
voice: &str,
quality: &str,
rate: f32,
pitch: f32,
volume: f32,
duration: Option<f64>,
) -> AudioMetadata {
let mut writer = MetadataWriter::for_synthesis()
.add_synthesis_metadata(text, voice, quality, rate, pitch, volume);
if let Some(duration) = duration {
writer = writer.duration(duration);
}
writer.metadata
}
pub fn extract_synthesis_text<P: AsRef<Path>>(
file_path: P,
format: AudioFormat,
) -> Result<Option<String>, MetadataError> {
let metadata = MetadataReader::read_from_file(file_path, format)?;
Ok(metadata.get_original_text().cloned())
}
pub fn is_voirs_generated<P: AsRef<Path>>(
file_path: P,
format: AudioFormat,
) -> Result<bool, MetadataError> {
let metadata = MetadataReader::read_from_file(file_path, format)?;
Ok(metadata.is_synthesized())
}
impl Default for MetadataWriter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_minimal_wav_file() -> Vec<u8> {
let mut wav_data = Vec::new();
wav_data.extend_from_slice(b"RIFF");
wav_data.extend_from_slice(&(36u32).to_le_bytes()); wav_data.extend_from_slice(b"WAVE");
wav_data.extend_from_slice(b"fmt ");
wav_data.extend_from_slice(&(16u32).to_le_bytes()); wav_data.extend_from_slice(&(1u16).to_le_bytes()); wav_data.extend_from_slice(&(1u16).to_le_bytes()); wav_data.extend_from_slice(&(44100u32).to_le_bytes()); wav_data.extend_from_slice(&(88200u32).to_le_bytes()); wav_data.extend_from_slice(&(2u16).to_le_bytes()); wav_data.extend_from_slice(&(16u16).to_le_bytes());
wav_data.extend_from_slice(b"data");
wav_data.extend_from_slice(&(0u32).to_le_bytes());
wav_data
}
#[test]
fn test_metadata_creation() {
let metadata = MetadataWriter::new()
.title("Test Title")
.artist("Test Artist")
.album("Test Album")
.track(1)
.year(2024)
.genre("Speech")
.comment("Test comment")
.duration(10.5)
.metadata;
assert_eq!(metadata.title, Some("Test Title".to_string()));
assert_eq!(metadata.artist, Some("Test Artist".to_string()));
assert_eq!(metadata.track, Some(1));
assert_eq!(metadata.duration, Some(10.5));
}
#[test]
fn test_synthesis_metadata() {
let metadata = create_synthesis_metadata(
"Hello, world!",
"en-us-female",
"high",
1.0,
0.0,
0.0,
Some(5.2),
);
assert!(metadata.is_synthesized());
assert_eq!(
metadata.get_original_text(),
Some(&"Hello, world!".to_string())
);
assert!(metadata.synthesis_params.contains_key("voice"));
assert_eq!(metadata.duration, Some(5.2));
}
#[test]
fn test_album_art_creation() {
let art_data = vec![0xFF, 0xD8, 0xFF, 0xE0]; let album_art = AlbumArt::from_data(
art_data.clone(),
"image/jpeg".to_string(),
"Test cover".to_string(),
PictureType::CoverFront,
);
assert_eq!(album_art.data, art_data);
assert_eq!(album_art.mime_type, "image/jpeg");
assert_eq!(album_art.picture_type, PictureType::CoverFront);
}
#[test]
fn test_metadata_merge() {
let mut metadata1 = AudioMetadata::default();
metadata1.title = Some("Title 1".to_string());
metadata1.artist = Some("Artist 1".to_string());
let mut metadata2 = AudioMetadata::default();
metadata2.artist = Some("Artist 2".to_string());
metadata2.album = Some("Album 2".to_string());
metadata1.merge(&metadata2);
assert_eq!(metadata1.title, Some("Title 1".to_string())); assert_eq!(metadata1.artist, Some("Artist 2".to_string())); assert_eq!(metadata1.album, Some("Album 2".to_string())); }
#[test]
fn test_synthesis_params_formatting() {
let mut metadata = AudioMetadata::default();
metadata
.synthesis_params
.insert("voice".to_string(), "en-us-female".to_string());
metadata
.synthesis_params
.insert("quality".to_string(), "high".to_string());
let formatted = metadata.format_synthesis_params();
assert!(formatted.contains("voice: en-us-female"));
assert!(formatted.contains("quality: high"));
}
#[test]
fn test_metadata_writer_file_operations() {
let mut temp_file = NamedTempFile::new().unwrap();
let minimal_wav = create_minimal_wav_file();
temp_file.write_all(&minimal_wav).unwrap();
temp_file.flush().unwrap();
let metadata_writer = MetadataWriter::for_synthesis();
let result = metadata_writer.write_to_file(temp_file.path(), AudioFormat::Wav);
assert!(result.is_ok());
}
#[test]
fn test_album_art_image_dimensions() {
let png_data = vec![
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, ];
let png_art = AlbumArt::from_data(
png_data,
"image/png".to_string(),
"Test PNG".to_string(),
PictureType::CoverFront,
);
assert_eq!(png_art.get_dimensions(), Some((8, 8)));
let gif_data = vec![
0x47, 0x49, 0x46, 0x38, 0x37, 0x61, 0x10, 0x00, 0x18, 0x00, ];
let gif_art = AlbumArt::from_data(
gif_data,
"image/gif".to_string(),
"Test GIF".to_string(),
PictureType::CoverFront,
);
assert_eq!(gif_art.get_dimensions(), Some((16, 24)));
let mut bmp_data = vec![0; 26];
bmp_data[0] = b'B';
bmp_data[1] = b'M';
bmp_data[18..22].copy_from_slice(&(32u32).to_le_bytes());
bmp_data[22..26].copy_from_slice(&(32u32).to_le_bytes());
let bmp_art = AlbumArt::from_data(
bmp_data,
"image/bmp".to_string(),
"Test BMP".to_string(),
PictureType::CoverFront,
);
assert_eq!(bmp_art.get_dimensions(), Some((32, 32)));
let invalid_art = AlbumArt::from_data(
vec![0xFF, 0xFF, 0xFF],
"image/unknown".to_string(),
"Invalid".to_string(),
PictureType::Other,
);
assert_eq!(invalid_art.get_dimensions(), None);
}
#[test]
fn test_synchsafe_integer_encoding() {
let test_values = [0, 127, 128, 16383, 16384, 2097151];
for &value in &test_values {
let encoded = MetadataWriter::encode_synchsafe_int(value);
let decoded = MetadataWriter::decode_synchsafe_int(&encoded).unwrap();
assert_eq!(value, decoded, "Failed for value: {}", value);
}
}
#[test]
fn test_wav_info_field_creation() {
let writer = MetadataWriter::new();
let field = writer.create_wav_info_field("INAM", "Test Title");
assert_eq!(&field[0..4], b"INAM");
let size = u32::from_le_bytes([field[4], field[5], field[6], field[7]]);
assert_eq!(size, 10); assert_eq!(&field[8..18], b"Test Title");
}
}