1use std::collections::HashMap;
7use std::path::Path;
8use voirs_sdk::AudioFormat;
9
10#[derive(Debug, Clone, Default)]
12pub struct AudioMetadata {
13 pub title: Option<String>,
15 pub artist: Option<String>,
17 pub album: Option<String>,
19 pub track: Option<u32>,
21 pub year: Option<u32>,
23 pub genre: Option<String>,
25 pub comment: Option<String>,
27 pub duration: Option<f64>,
29 pub synthesis_params: HashMap<String, String>,
31 pub album_art: Option<AlbumArt>,
33 pub custom_tags: HashMap<String, String>,
35 pub voice_name: Option<String>,
37 pub text_source: Option<String>,
39 pub model_version: Option<String>,
41 pub language: Option<String>,
43 pub date: Option<String>,
45}
46
47#[derive(Debug, Clone)]
49pub struct AlbumArt {
50 pub data: Vec<u8>,
52 pub mime_type: String,
54 pub description: String,
56 pub picture_type: PictureType,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum PictureType {
63 Other,
64 Icon,
65 OtherIcon,
66 CoverFront,
67 CoverBack,
68 Leaflet,
69 Media,
70 LeadArtist,
71 Artist,
72 Conductor,
73 Band,
74 Composer,
75 Lyricist,
76 RecordingLocation,
77 DuringRecording,
78 DuringPerformance,
79 MovieScreenCapture,
80 ColouredFish,
81 Illustration,
82 BandLogo,
83 PublisherLogo,
84}
85
86pub struct MetadataWriter {
88 metadata: AudioMetadata,
89}
90
91impl MetadataWriter {
92 pub fn new() -> Self {
94 Self {
95 metadata: AudioMetadata::default(),
96 }
97 }
98
99 pub fn for_synthesis() -> Self {
101 let mut metadata = AudioMetadata::default();
102 metadata.artist = Some("VoiRS".to_string());
103 metadata.comment = Some("Generated by VoiRS Text-to-Speech".to_string());
104 metadata
105 .custom_tags
106 .insert("ENCODER".to_string(), "VoiRS CLI".to_string());
107 metadata.custom_tags.insert(
108 "SOFTWARE".to_string(),
109 env!("CARGO_PKG_VERSION").to_string(),
110 );
111
112 Self { metadata }
113 }
114
115 pub fn title<S: Into<String>>(mut self, title: S) -> Self {
117 self.metadata.title = Some(title.into());
118 self
119 }
120
121 pub fn artist<S: Into<String>>(mut self, artist: S) -> Self {
123 self.metadata.artist = Some(artist.into());
124 self
125 }
126
127 pub fn album<S: Into<String>>(mut self, album: S) -> Self {
129 self.metadata.album = Some(album.into());
130 self
131 }
132
133 pub fn track(mut self, track: u32) -> Self {
135 self.metadata.track = Some(track);
136 self
137 }
138
139 pub fn year(mut self, year: u32) -> Self {
141 self.metadata.year = Some(year);
142 self
143 }
144
145 pub fn genre<S: Into<String>>(mut self, genre: S) -> Self {
147 self.metadata.genre = Some(genre.into());
148 self
149 }
150
151 pub fn comment<S: Into<String>>(mut self, comment: S) -> Self {
153 self.metadata.comment = Some(comment.into());
154 self
155 }
156
157 pub fn duration(mut self, duration: f64) -> Self {
159 self.metadata.duration = Some(duration);
160 self
161 }
162
163 pub fn synthesis_param<S: Into<String>>(mut self, key: S, value: S) -> Self {
165 self.metadata
166 .synthesis_params
167 .insert(key.into(), value.into());
168 self
169 }
170
171 pub fn custom_tag<S: Into<String>>(mut self, key: S, value: S) -> Self {
173 self.metadata.custom_tags.insert(key.into(), value.into());
174 self
175 }
176
177 pub fn album_art(mut self, art: AlbumArt) -> Self {
179 self.metadata.album_art = Some(art);
180 self
181 }
182
183 pub fn add_synthesis_metadata(
185 mut self,
186 text: &str,
187 voice: &str,
188 quality: &str,
189 rate: f32,
190 pitch: f32,
191 volume: f32,
192 ) -> Self {
193 let title = if text.len() > 100 {
195 format!("{}...", &text[..97])
196 } else {
197 text.to_string()
198 };
199 self.metadata.title = Some(title);
200
201 self.metadata
203 .synthesis_params
204 .insert("voice".to_string(), voice.to_string());
205 self.metadata
206 .synthesis_params
207 .insert("quality".to_string(), quality.to_string());
208 self.metadata
209 .synthesis_params
210 .insert("rate".to_string(), rate.to_string());
211 self.metadata
212 .synthesis_params
213 .insert("pitch".to_string(), pitch.to_string());
214 self.metadata
215 .synthesis_params
216 .insert("volume".to_string(), volume.to_string());
217 self.metadata
218 .synthesis_params
219 .insert("original_text".to_string(), text.to_string());
220
221 self.metadata
223 .custom_tags
224 .insert("SYNTHESIS_ENGINE".to_string(), "VoiRS".to_string());
225 self.metadata
226 .custom_tags
227 .insert("VOICE_MODEL".to_string(), voice.to_string());
228 self.metadata.custom_tags.insert(
229 "GENERATION_DATE".to_string(),
230 chrono::Utc::now()
231 .format("%Y-%m-%d %H:%M:%S UTC")
232 .to_string(),
233 );
234
235 self
236 }
237
238 pub fn write_to_file<P: AsRef<Path>>(
240 &self,
241 file_path: P,
242 format: AudioFormat,
243 ) -> Result<(), MetadataError> {
244 match format {
245 AudioFormat::Mp3 => self.write_id3_tags(file_path),
246 AudioFormat::Flac => self.write_vorbis_comments(file_path),
247 AudioFormat::Ogg => self.write_vorbis_comments(file_path),
248 AudioFormat::Opus => self.write_opus_tags(file_path),
249 AudioFormat::Wav => self.write_wav_metadata(file_path),
250 }
251 }
252
253 fn write_id3_tags<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
255 let file_path = file_path.as_ref();
256 tracing::info!("Writing ID3 tags to MP3 file: {}", file_path.display());
257
258 let mut file_content = std::fs::read(file_path).map_err(MetadataError::IoError)?;
263
264 let id3_tag = self.create_id3_tag_bytes()?;
266
267 if file_content.len() >= 10 && &file_content[0..3] == b"ID3" {
269 let existing_tag_size = self.parse_id3_tag_size(&file_content[0..10])?;
271 let total_existing_size = 10 + existing_tag_size; file_content = file_content[total_existing_size..].to_vec();
275 }
276
277 let mut new_content = Vec::new();
279 new_content.extend_from_slice(&id3_tag);
280 new_content.extend_from_slice(&file_content);
281
282 std::fs::write(file_path, new_content).map_err(MetadataError::IoError)?;
284
285 tracing::info!("Successfully wrote ID3 tags to {}", file_path.display());
286 Ok(())
287 }
288
289 fn create_id3_tag_bytes(&self) -> Result<Vec<u8>, MetadataError> {
291 let mut tag_data = Vec::new();
292
293 if let Some(title) = &self.metadata.title {
295 tag_data.extend_from_slice(&self.create_id3_frame("TIT2", title)?);
296 }
297 if let Some(artist) = &self.metadata.artist {
298 tag_data.extend_from_slice(&self.create_id3_frame("TPE1", artist)?);
299 }
300 if let Some(album) = &self.metadata.album {
301 tag_data.extend_from_slice(&self.create_id3_frame("TALB", album)?);
302 }
303 if let Some(genre) = &self.metadata.genre {
304 tag_data.extend_from_slice(&self.create_id3_frame("TCON", genre)?);
305 }
306 if let Some(date) = &self.metadata.date {
307 tag_data.extend_from_slice(&self.create_id3_frame("TDRC", date)?);
308 }
309 if let Some(comment) = &self.metadata.comment {
310 tag_data.extend_from_slice(&self.create_id3_frame("COMM", comment)?);
311 }
312
313 if let Some(voice_name) = &self.metadata.voice_name {
315 tag_data.extend_from_slice(
316 &self.create_id3_frame("TXXX", &format!("VoiRS_Voice={}", voice_name))?,
317 );
318 }
319 if let Some(text_source) = &self.metadata.text_source {
320 tag_data.extend_from_slice(
321 &self.create_id3_frame("TXXX", &format!("VoiRS_TextSource={}", text_source))?,
322 );
323 }
324 if !self.metadata.synthesis_params.is_empty() {
325 let params_str = self
326 .metadata
327 .synthesis_params
328 .iter()
329 .map(|(k, v)| format!("{}={}", k, v))
330 .collect::<Vec<_>>()
331 .join(";");
332 tag_data.extend_from_slice(
333 &self.create_id3_frame("TXXX", &format!("VoiRS_SynthesisParams={}", params_str))?,
334 );
335 }
336 if let Some(model_version) = &self.metadata.model_version {
337 tag_data.extend_from_slice(
338 &self.create_id3_frame("TXXX", &format!("VoiRS_ModelVersion={}", model_version))?,
339 );
340 }
341 if let Some(language) = &self.metadata.language {
342 tag_data.extend_from_slice(&self.create_id3_frame("TLAN", language)?);
343 }
344
345 let mut header = Vec::new();
347 header.extend_from_slice(b"ID3"); header.push(0x04); header.push(0x00); header.push(0x00); let tag_size = tag_data.len() as u32;
354 header.extend_from_slice(&Self::encode_synchsafe_int(tag_size));
355
356 let mut result = Vec::new();
358 result.extend_from_slice(&header);
359 result.extend_from_slice(&tag_data);
360
361 Ok(result)
362 }
363
364 fn create_id3_frame(&self, frame_id: &str, content: &str) -> Result<Vec<u8>, MetadataError> {
366 let mut frame = Vec::new();
367
368 frame.extend_from_slice(frame_id.as_bytes());
370
371 let mut frame_content = Vec::new();
373 frame_content.push(0x03); frame_content.extend_from_slice(content.as_bytes());
375
376 let frame_size = frame_content.len() as u32;
378 frame.extend_from_slice(&Self::encode_synchsafe_int(frame_size));
379
380 frame.push(0x00);
382 frame.push(0x00);
383
384 frame.extend_from_slice(&frame_content);
386
387 Ok(frame)
388 }
389
390 fn parse_id3_tag_size(&self, header: &[u8]) -> Result<usize, MetadataError> {
392 if header.len() < 10 {
393 return Err(MetadataError::InvalidFormat(
394 "ID3 header too short".to_string(),
395 ));
396 }
397
398 let size_bytes = &header[6..10];
400 let size = Self::decode_synchsafe_int(size_bytes)?;
401
402 Ok(size as usize)
403 }
404
405 fn encode_synchsafe_int(value: u32) -> [u8; 4] {
407 [
408 ((value >> 21) & 0x7F) as u8,
409 ((value >> 14) & 0x7F) as u8,
410 ((value >> 7) & 0x7F) as u8,
411 (value & 0x7F) as u8,
412 ]
413 }
414
415 fn decode_synchsafe_int(bytes: &[u8]) -> Result<u32, MetadataError> {
417 if bytes.len() < 4 {
418 return Err(MetadataError::InvalidFormat(
419 "Invalid synchsafe integer".to_string(),
420 ));
421 }
422
423 let value = ((bytes[0] as u32) << 21)
424 | ((bytes[1] as u32) << 14)
425 | ((bytes[2] as u32) << 7)
426 | (bytes[3] as u32);
427
428 Ok(value)
429 }
430
431 fn write_vorbis_comments<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
433 let path = file_path.as_ref();
434 tracing::info!("Writing Vorbis comments to FLAC/OGG file: {:?}", path);
435
436 if !path.exists() {
438 return Err(MetadataError::IoError(std::io::Error::new(
439 std::io::ErrorKind::NotFound,
440 format!("File not found: {:?}", path),
441 )));
442 }
443
444 let mut metadata_path = path.to_path_buf();
447 metadata_path.set_extension("vorbis_comments.txt");
448
449 let mut content = String::new();
450 content.push_str("# Vorbis Comments for FLAC/OGG file\n");
451 content.push_str(&format!("# Original file: {:?}\n\n", path));
452
453 if let Some(title) = &self.metadata.title {
454 content.push_str(&format!("TITLE={}\n", title));
455 }
456 if let Some(artist) = &self.metadata.artist {
457 content.push_str(&format!("ARTIST={}\n", artist));
458 }
459 if let Some(album) = &self.metadata.album {
460 content.push_str(&format!("ALBUM={}\n", album));
461 }
462 if let Some(comment) = &self.metadata.comment {
463 content.push_str(&format!("COMMENT={}\n", comment));
464 }
465 if let Some(genre) = &self.metadata.genre {
466 content.push_str(&format!("GENRE={}\n", genre));
467 }
468
469 std::fs::write(&metadata_path, content).map_err(MetadataError::IoError)?;
470
471 tracing::info!(
472 "Vorbis comments written to companion file: {:?}",
473 metadata_path
474 );
475 Ok(())
476 }
477
478 fn write_opus_tags<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
480 let path = file_path.as_ref();
481 tracing::info!("Writing Opus tags to file: {:?}", path);
482
483 if !path.exists() {
485 return Err(MetadataError::IoError(std::io::Error::new(
486 std::io::ErrorKind::NotFound,
487 format!("File not found: {:?}", path),
488 )));
489 }
490
491 let mut metadata_path = path.to_path_buf();
494 metadata_path.set_extension("opus_tags.txt");
495
496 let mut content = String::new();
497 content.push_str("# Opus Tags (Vorbis Comment format)\n");
498 content.push_str(&format!("# Original file: {:?}\n\n", path));
499
500 if let Some(title) = &self.metadata.title {
501 content.push_str(&format!("TITLE={}\n", title));
502 }
503 if let Some(artist) = &self.metadata.artist {
504 content.push_str(&format!("ARTIST={}\n", artist));
505 }
506 if let Some(album) = &self.metadata.album {
507 content.push_str(&format!("ALBUM={}\n", album));
508 }
509 if let Some(comment) = &self.metadata.comment {
510 content.push_str(&format!("COMMENT={}\n", comment));
511 }
512 if let Some(genre) = &self.metadata.genre {
513 content.push_str(&format!("GENRE={}\n", genre));
514 }
515
516 std::fs::write(&metadata_path, content).map_err(MetadataError::IoError)?;
517
518 tracing::info!("Opus tags written to companion file: {:?}", metadata_path);
519 Ok(())
520 }
521
522 fn write_wav_metadata<P: AsRef<Path>>(&self, file_path: P) -> Result<(), MetadataError> {
524 let path = file_path.as_ref();
525 tracing::info!("Writing WAV INFO chunk metadata to: {}", path.display());
526
527 let mut file_content = std::fs::read(path).map_err(MetadataError::IoError)?;
529
530 if file_content.len() < 12 {
531 return Err(MetadataError::InvalidFormat(
532 "WAV file too small".to_string(),
533 ));
534 }
535
536 if &file_content[0..4] != b"RIFF" || &file_content[8..12] != b"WAVE" {
538 return Err(MetadataError::InvalidFormat(
539 "Not a valid WAV file".to_string(),
540 ));
541 }
542
543 let info_chunk = self.create_wav_info_chunk()?;
545
546 let mut offset = 12; let mut fmt_found = false;
549 let mut data_end = None;
550
551 while offset + 8 <= file_content.len() {
552 let chunk_id = &file_content[offset..offset + 4];
553 let chunk_size = u32::from_le_bytes([
554 file_content[offset + 4],
555 file_content[offset + 5],
556 file_content[offset + 6],
557 file_content[offset + 7],
558 ]) as usize;
559
560 if chunk_id == b"fmt " {
561 fmt_found = true;
562 } else if chunk_id == b"data" && fmt_found {
563 data_end = Some(offset + 8 + chunk_size);
564 break;
565 }
566
567 offset += 8 + chunk_size;
568 if chunk_size % 2 != 0 {
570 offset += 1;
571 }
572 }
573
574 let insert_point = data_end.unwrap_or(file_content.len());
575
576 let mut cleaned_content = Vec::new();
578 let mut scan_offset = 12;
579
580 while scan_offset + 8 <= file_content.len() {
581 let chunk_id = &file_content[scan_offset..scan_offset + 4];
582 let chunk_size = u32::from_le_bytes([
583 file_content[scan_offset + 4],
584 file_content[scan_offset + 5],
585 file_content[scan_offset + 6],
586 file_content[scan_offset + 7],
587 ]) as usize;
588
589 if chunk_id == b"LIST"
590 && scan_offset + 12 <= file_content.len()
591 && &file_content[scan_offset + 8..scan_offset + 12] == b"INFO"
592 {
593 scan_offset += 8 + chunk_size;
595 if chunk_size % 2 != 0 {
596 scan_offset += 1;
597 }
598 continue;
599 }
600
601 let chunk_end = scan_offset + 8 + chunk_size;
603 if chunk_size % 2 != 0 && chunk_end < file_content.len() {
604 cleaned_content.extend_from_slice(&file_content[scan_offset..chunk_end + 1]);
605 scan_offset = chunk_end + 1;
606 } else {
607 cleaned_content.extend_from_slice(&file_content[scan_offset..chunk_end]);
608 scan_offset = chunk_end;
609 }
610 }
611
612 if scan_offset < file_content.len() {
614 cleaned_content.extend_from_slice(&file_content[scan_offset..]);
615 }
616
617 let mut new_content = Vec::new();
619 let actual_insert = if insert_point <= cleaned_content.len() {
620 insert_point
621 } else {
622 cleaned_content.len()
623 };
624
625 new_content.extend_from_slice(&cleaned_content[..actual_insert]);
626 new_content.extend_from_slice(&info_chunk);
627 if actual_insert < cleaned_content.len() {
628 new_content.extend_from_slice(&cleaned_content[actual_insert..]);
629 }
630
631 let total_size = new_content.len() - 8;
633 new_content[4..8].copy_from_slice(&(total_size as u32).to_le_bytes());
634
635 std::fs::write(path, new_content).map_err(MetadataError::IoError)?;
637
638 tracing::info!("Successfully wrote WAV INFO chunk to {}", path.display());
639 Ok(())
640 }
641
642 fn create_wav_info_chunk(&self) -> Result<Vec<u8>, MetadataError> {
644 let mut info_data = Vec::new();
645
646 if let Some(title) = &self.metadata.title {
648 info_data.extend_from_slice(&self.create_wav_info_field("INAM", title));
649 }
650 if let Some(artist) = &self.metadata.artist {
651 info_data.extend_from_slice(&self.create_wav_info_field("IART", artist));
652 }
653 if let Some(album) = &self.metadata.album {
654 info_data.extend_from_slice(&self.create_wav_info_field("IPRD", album));
655 }
656 if let Some(comment) = &self.metadata.comment {
657 info_data.extend_from_slice(&self.create_wav_info_field("ICMT", comment));
658 }
659 if let Some(genre) = &self.metadata.genre {
660 info_data.extend_from_slice(&self.create_wav_info_field("IGNR", genre));
661 }
662 if let Some(date) = &self.metadata.date {
663 info_data.extend_from_slice(&self.create_wav_info_field("ICRD", date));
664 }
665
666 if let Some(voice_name) = &self.metadata.voice_name {
668 info_data.extend_from_slice(
669 &self.create_wav_info_field("ISFT", &format!("VoiRS Voice: {}", voice_name)),
670 );
671 }
672 if !self.metadata.synthesis_params.is_empty() {
673 let params_str = self
674 .metadata
675 .synthesis_params
676 .iter()
677 .map(|(k, v)| format!("{}={}", k, v))
678 .collect::<Vec<_>>()
679 .join("; ");
680 info_data.extend_from_slice(&self.create_wav_info_field("ISRC", ¶ms_str));
681 }
682
683 let mut chunk = Vec::new();
685 chunk.extend_from_slice(b"LIST"); chunk.extend_from_slice(&((info_data.len() + 4) as u32).to_le_bytes()); chunk.extend_from_slice(b"INFO"); chunk.extend_from_slice(&info_data); if chunk.len() % 2 != 0 {
692 chunk.push(0);
693 }
694
695 Ok(chunk)
696 }
697
698 fn create_wav_info_field(&self, field_id: &str, value: &str) -> Vec<u8> {
700 let mut field = Vec::new();
701 field.extend_from_slice(field_id.as_bytes()); let value_bytes = value.as_bytes();
704 field.extend_from_slice(&(value_bytes.len() as u32).to_le_bytes()); field.extend_from_slice(value_bytes); if value_bytes.len() % 2 != 0 {
709 field.push(0);
710 }
711
712 field
713 }
714}
715
716pub struct MetadataReader;
718
719impl MetadataReader {
720 pub fn read_from_file<P: AsRef<Path>>(
722 file_path: P,
723 format: AudioFormat,
724 ) -> Result<AudioMetadata, MetadataError> {
725 match format {
726 AudioFormat::Mp3 => Self::read_id3_tags(file_path),
727 AudioFormat::Flac => Self::read_vorbis_comments(file_path),
728 AudioFormat::Ogg => Self::read_vorbis_comments(file_path),
729 AudioFormat::Opus => Self::read_opus_tags(file_path),
730 AudioFormat::Wav => Self::read_wav_metadata(file_path),
731 }
732 }
733
734 fn read_id3_tags<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
736 let path = file_path.as_ref();
737 tracing::debug!("Reading ID3 tags from MP3 file: {:?}", path);
738
739 if !path.exists() {
741 return Err(MetadataError::IoError(std::io::Error::new(
742 std::io::ErrorKind::NotFound,
743 format!("File not found: {:?}", path),
744 )));
745 }
746
747 let mut metadata = AudioMetadata::default();
749
750 if let Ok(file_metadata) = std::fs::metadata(path) {
752 metadata
753 .custom_tags
754 .insert("file_size".to_string(), file_metadata.len().to_string());
755
756 if let Ok(modified) = file_metadata.modified() {
757 if let Ok(datetime) = modified.duration_since(std::time::UNIX_EPOCH) {
758 metadata
759 .custom_tags
760 .insert("modified_time".to_string(), datetime.as_secs().to_string());
761 }
762 }
763 }
764
765 metadata
766 .custom_tags
767 .insert("format".to_string(), "MP3".to_string());
768 metadata.custom_tags.insert(
769 "note".to_string(),
770 "ID3 tag reading requires additional dependencies".to_string(),
771 );
772
773 Ok(metadata)
774 }
775
776 fn read_vorbis_comments<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
778 let path = file_path.as_ref();
779 tracing::debug!("Reading Vorbis comments from FLAC/OGG file: {:?}", path);
780
781 if !path.exists() {
783 return Err(MetadataError::IoError(std::io::Error::new(
784 std::io::ErrorKind::NotFound,
785 format!("File not found: {:?}", path),
786 )));
787 }
788
789 let mut metadata = AudioMetadata::default();
790
791 if let Ok(file_metadata) = std::fs::metadata(path) {
793 metadata
794 .custom_tags
795 .insert("file_size".to_string(), file_metadata.len().to_string());
796 }
797
798 let format = if let Some(ext) = path.extension() {
800 match ext.to_string_lossy().to_lowercase().as_str() {
801 "flac" => "FLAC",
802 "ogg" | "oga" => "OGG Vorbis",
803 _ => "Unknown Vorbis-based format",
804 }
805 } else {
806 "Unknown Vorbis-based format"
807 };
808
809 metadata
810 .custom_tags
811 .insert("format".to_string(), format.to_string());
812 metadata.custom_tags.insert(
813 "note".to_string(),
814 "Vorbis comment reading requires additional dependencies (metaflac, lewton)"
815 .to_string(),
816 );
817
818 Ok(metadata)
819 }
820
821 fn read_opus_tags<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
823 let path = file_path.as_ref();
824 tracing::debug!("Reading Opus tags from file: {:?}", path);
825
826 if !path.exists() {
828 return Err(MetadataError::IoError(std::io::Error::new(
829 std::io::ErrorKind::NotFound,
830 format!("File not found: {:?}", path),
831 )));
832 }
833
834 let mut metadata = AudioMetadata::default();
835
836 if let Ok(file_metadata) = std::fs::metadata(path) {
838 metadata
839 .custom_tags
840 .insert("file_size".to_string(), file_metadata.len().to_string());
841 }
842
843 metadata
844 .custom_tags
845 .insert("format".to_string(), "Opus".to_string());
846 metadata.custom_tags.insert(
847 "note".to_string(),
848 "Opus tag reading requires additional dependencies (opus crate)".to_string(),
849 );
850 metadata.custom_tags.insert(
851 "metadata_format".to_string(),
852 "Vorbis Comment-style".to_string(),
853 );
854
855 Ok(metadata)
856 }
857
858 fn read_wav_metadata<P: AsRef<Path>>(file_path: P) -> Result<AudioMetadata, MetadataError> {
860 let path = file_path.as_ref();
861
862 match hound::WavReader::open(path) {
864 Ok(reader) => {
865 let spec = reader.spec();
866 let duration = reader.duration() as f64 / spec.sample_rate as f64;
867
868 let mut metadata = AudioMetadata::default();
869 metadata.duration = Some(duration);
870
871 metadata
873 .custom_tags
874 .insert("sample_rate".to_string(), spec.sample_rate.to_string());
875 metadata
876 .custom_tags
877 .insert("channels".to_string(), spec.channels.to_string());
878 metadata.custom_tags.insert(
879 "bits_per_sample".to_string(),
880 spec.bits_per_sample.to_string(),
881 );
882 metadata.custom_tags.insert(
883 "sample_format".to_string(),
884 format!("{:?}", spec.sample_format),
885 );
886
887 if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) {
889 if filename.contains("voirs") || filename.contains("synthesis") {
890 metadata
891 .synthesis_params
892 .insert("generator".to_string(), "VoiRS".to_string());
893 metadata.comment = Some("Generated by VoiRS speech synthesis".to_string());
894 metadata.genre = Some("Speech".to_string());
895 }
896 }
897
898 Ok(metadata)
899 }
900 Err(e) => Err(MetadataError::IoError(std::io::Error::new(
901 std::io::ErrorKind::InvalidData,
902 format!("Failed to read WAV file: {}", e),
903 ))),
904 }
905 }
906}
907
908impl AudioMetadata {
910 pub fn is_synthesized(&self) -> bool {
912 self.synthesis_params.contains_key("voice")
913 || self
914 .custom_tags
915 .get("SYNTHESIS_ENGINE")
916 .is_some_and(|v| v == "VoiRS")
917 }
918
919 pub fn get_original_text(&self) -> Option<&String> {
921 self.synthesis_params.get("original_text")
922 }
923
924 pub fn format_synthesis_params(&self) -> String {
926 if self.synthesis_params.is_empty() {
927 return "No synthesis parameters".to_string();
928 }
929
930 let mut params = Vec::new();
931 for (key, value) in &self.synthesis_params {
932 params.push(format!("{}: {}", key, value));
933 }
934 params.join(", ")
935 }
936
937 pub fn merge(&mut self, other: &AudioMetadata) {
939 if other.title.is_some() {
940 self.title = other.title.clone();
941 }
942 if other.artist.is_some() {
943 self.artist = other.artist.clone();
944 }
945 if other.album.is_some() {
946 self.album = other.album.clone();
947 }
948 if other.track.is_some() {
949 self.track = other.track;
950 }
951 if other.year.is_some() {
952 self.year = other.year;
953 }
954 if other.genre.is_some() {
955 self.genre = other.genre.clone();
956 }
957 if other.comment.is_some() {
958 self.comment = other.comment.clone();
959 }
960 if other.duration.is_some() {
961 self.duration = other.duration;
962 }
963 if other.album_art.is_some() {
964 self.album_art = other.album_art.clone();
965 }
966
967 for (key, value) in &other.synthesis_params {
969 self.synthesis_params.insert(key.clone(), value.clone());
970 }
971 for (key, value) in &other.custom_tags {
972 self.custom_tags.insert(key.clone(), value.clone());
973 }
974 }
975}
976
977impl AlbumArt {
978 pub fn from_file<P: AsRef<Path>>(
980 file_path: P,
981 description: String,
982 picture_type: PictureType,
983 ) -> Result<Self, MetadataError> {
984 let path = file_path.as_ref();
985 let data = std::fs::read(path).map_err(MetadataError::IoError)?;
986
987 let mime_type = match path.extension().and_then(|ext| ext.to_str()) {
988 Some("jpg") | Some("jpeg") => "image/jpeg".to_string(),
989 Some("png") => "image/png".to_string(),
990 Some("gif") => "image/gif".to_string(),
991 Some("bmp") => "image/bmp".to_string(),
992 _ => "application/octet-stream".to_string(),
993 };
994
995 Ok(AlbumArt {
996 data,
997 mime_type,
998 description,
999 picture_type,
1000 })
1001 }
1002
1003 pub fn from_data(
1005 data: Vec<u8>,
1006 mime_type: String,
1007 description: String,
1008 picture_type: PictureType,
1009 ) -> Self {
1010 AlbumArt {
1011 data,
1012 mime_type,
1013 description,
1014 picture_type,
1015 }
1016 }
1017
1018 pub fn get_dimensions(&self) -> Option<(u32, u32)> {
1020 match self.mime_type.as_str() {
1022 "image/jpeg" => self.parse_jpeg_dimensions(),
1023 "image/png" => self.parse_png_dimensions(),
1024 "image/gif" => self.parse_gif_dimensions(),
1025 "image/bmp" => self.parse_bmp_dimensions(),
1026 _ => None,
1027 }
1028 }
1029
1030 fn parse_jpeg_dimensions(&self) -> Option<(u32, u32)> {
1032 if self.data.len() < 10 {
1033 return None;
1034 }
1035
1036 if self.data[0..2] != [0xFF, 0xD8] {
1038 return None;
1039 }
1040
1041 let mut offset = 2;
1042 while offset + 8 < self.data.len() {
1043 if self.data[offset] != 0xFF {
1044 break;
1045 }
1046
1047 let marker = self.data[offset + 1];
1048 offset += 2;
1049
1050 if ((0xC0..=0xC3).contains(&marker)
1052 || (0xC5..=0xC7).contains(&marker)
1053 || (0xC9..=0xCB).contains(&marker)
1054 || (0xCD..=0xCF).contains(&marker))
1055 && offset + 5 < self.data.len()
1056 {
1057 let height =
1058 u16::from_be_bytes([self.data[offset + 3], self.data[offset + 4]]) as u32;
1059 let width =
1060 u16::from_be_bytes([self.data[offset + 5], self.data[offset + 6]]) as u32;
1061 return Some((width, height));
1062 }
1063
1064 if offset + 1 < self.data.len() {
1066 let length =
1067 u16::from_be_bytes([self.data[offset], self.data[offset + 1]]) as usize;
1068 offset += length;
1069 } else {
1070 break;
1071 }
1072 }
1073 None
1074 }
1075
1076 fn parse_png_dimensions(&self) -> Option<(u32, u32)> {
1078 if self.data.len() < 24 {
1079 return None;
1080 }
1081
1082 if self.data[0..8] != [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] {
1084 return None;
1085 }
1086
1087 if &self.data[12..16] == b"IHDR" {
1089 let width =
1090 u32::from_be_bytes([self.data[16], self.data[17], self.data[18], self.data[19]]);
1091 let height =
1092 u32::from_be_bytes([self.data[20], self.data[21], self.data[22], self.data[23]]);
1093 return Some((width, height));
1094 }
1095 None
1096 }
1097
1098 fn parse_gif_dimensions(&self) -> Option<(u32, u32)> {
1100 if self.data.len() < 10 {
1101 return None;
1102 }
1103
1104 if &self.data[0..6] != b"GIF87a" && &self.data[0..6] != b"GIF89a" {
1106 return None;
1107 }
1108
1109 let width = u16::from_le_bytes([self.data[6], self.data[7]]) as u32;
1110 let height = u16::from_le_bytes([self.data[8], self.data[9]]) as u32;
1111 Some((width, height))
1112 }
1113
1114 fn parse_bmp_dimensions(&self) -> Option<(u32, u32)> {
1116 if self.data.len() < 26 {
1117 return None;
1118 }
1119
1120 if &self.data[0..2] != b"BM" {
1122 return None;
1123 }
1124
1125 let width =
1127 u32::from_le_bytes([self.data[18], self.data[19], self.data[20], self.data[21]]);
1128 let height =
1129 u32::from_le_bytes([self.data[22], self.data[23], self.data[24], self.data[25]]);
1130 Some((width, height))
1131 }
1132}
1133
1134#[derive(Debug, thiserror::Error)]
1136pub enum MetadataError {
1137 #[error("IO error: {0}")]
1138 IoError(#[from] std::io::Error),
1139
1140 #[error("Unsupported format: {0}")]
1141 UnsupportedFormat(String),
1142
1143 #[error("Invalid metadata: {0}")]
1144 InvalidMetadata(String),
1145
1146 #[error("Invalid format: {0}")]
1147 InvalidFormat(String),
1148
1149 #[error("Encoding error: {0}")]
1150 EncodingError(String),
1151}
1152
1153pub fn create_synthesis_metadata(
1157 text: &str,
1158 voice: &str,
1159 quality: &str,
1160 rate: f32,
1161 pitch: f32,
1162 volume: f32,
1163 duration: Option<f64>,
1164) -> AudioMetadata {
1165 let mut writer = MetadataWriter::for_synthesis()
1166 .add_synthesis_metadata(text, voice, quality, rate, pitch, volume);
1167
1168 if let Some(duration) = duration {
1169 writer = writer.duration(duration);
1170 }
1171
1172 writer.metadata
1173}
1174
1175pub fn extract_synthesis_text<P: AsRef<Path>>(
1177 file_path: P,
1178 format: AudioFormat,
1179) -> Result<Option<String>, MetadataError> {
1180 let metadata = MetadataReader::read_from_file(file_path, format)?;
1181 Ok(metadata.get_original_text().cloned())
1182}
1183
1184pub fn is_voirs_generated<P: AsRef<Path>>(
1186 file_path: P,
1187 format: AudioFormat,
1188) -> Result<bool, MetadataError> {
1189 let metadata = MetadataReader::read_from_file(file_path, format)?;
1190 Ok(metadata.is_synthesized())
1191}
1192
1193impl Default for MetadataWriter {
1194 fn default() -> Self {
1195 Self::new()
1196 }
1197}
1198
1199#[cfg(test)]
1200mod tests {
1201 use super::*;
1202 use std::io::Write;
1203 use tempfile::NamedTempFile;
1204
1205 fn create_minimal_wav_file() -> Vec<u8> {
1207 let mut wav_data = Vec::new();
1208
1209 wav_data.extend_from_slice(b"RIFF");
1211 wav_data.extend_from_slice(&(36u32).to_le_bytes()); wav_data.extend_from_slice(b"WAVE");
1213
1214 wav_data.extend_from_slice(b"fmt ");
1216 wav_data.extend_from_slice(&(16u32).to_le_bytes()); wav_data.extend_from_slice(&(1u16).to_le_bytes()); wav_data.extend_from_slice(&(1u16).to_le_bytes()); wav_data.extend_from_slice(&(44100u32).to_le_bytes()); wav_data.extend_from_slice(&(88200u32).to_le_bytes()); wav_data.extend_from_slice(&(2u16).to_le_bytes()); wav_data.extend_from_slice(&(16u16).to_le_bytes()); wav_data.extend_from_slice(b"data");
1226 wav_data.extend_from_slice(&(0u32).to_le_bytes()); wav_data
1229 }
1230
1231 #[test]
1232 fn test_metadata_creation() {
1233 let metadata = MetadataWriter::new()
1234 .title("Test Title")
1235 .artist("Test Artist")
1236 .album("Test Album")
1237 .track(1)
1238 .year(2024)
1239 .genre("Speech")
1240 .comment("Test comment")
1241 .duration(10.5)
1242 .metadata;
1243
1244 assert_eq!(metadata.title, Some("Test Title".to_string()));
1245 assert_eq!(metadata.artist, Some("Test Artist".to_string()));
1246 assert_eq!(metadata.track, Some(1));
1247 assert_eq!(metadata.duration, Some(10.5));
1248 }
1249
1250 #[test]
1251 fn test_synthesis_metadata() {
1252 let metadata = create_synthesis_metadata(
1253 "Hello, world!",
1254 "en-us-female",
1255 "high",
1256 1.0,
1257 0.0,
1258 0.0,
1259 Some(5.2),
1260 );
1261
1262 assert!(metadata.is_synthesized());
1263 assert_eq!(
1264 metadata.get_original_text(),
1265 Some(&"Hello, world!".to_string())
1266 );
1267 assert!(metadata.synthesis_params.contains_key("voice"));
1268 assert_eq!(metadata.duration, Some(5.2));
1269 }
1270
1271 #[test]
1272 fn test_album_art_creation() {
1273 let art_data = vec![0xFF, 0xD8, 0xFF, 0xE0]; let album_art = AlbumArt::from_data(
1275 art_data.clone(),
1276 "image/jpeg".to_string(),
1277 "Test cover".to_string(),
1278 PictureType::CoverFront,
1279 );
1280
1281 assert_eq!(album_art.data, art_data);
1282 assert_eq!(album_art.mime_type, "image/jpeg");
1283 assert_eq!(album_art.picture_type, PictureType::CoverFront);
1284 }
1285
1286 #[test]
1287 fn test_metadata_merge() {
1288 let mut metadata1 = AudioMetadata::default();
1289 metadata1.title = Some("Title 1".to_string());
1290 metadata1.artist = Some("Artist 1".to_string());
1291
1292 let mut metadata2 = AudioMetadata::default();
1293 metadata2.artist = Some("Artist 2".to_string());
1294 metadata2.album = Some("Album 2".to_string());
1295
1296 metadata1.merge(&metadata2);
1297
1298 assert_eq!(metadata1.title, Some("Title 1".to_string())); assert_eq!(metadata1.artist, Some("Artist 2".to_string())); assert_eq!(metadata1.album, Some("Album 2".to_string())); }
1302
1303 #[test]
1304 fn test_synthesis_params_formatting() {
1305 let mut metadata = AudioMetadata::default();
1306 metadata
1307 .synthesis_params
1308 .insert("voice".to_string(), "en-us-female".to_string());
1309 metadata
1310 .synthesis_params
1311 .insert("quality".to_string(), "high".to_string());
1312
1313 let formatted = metadata.format_synthesis_params();
1314 assert!(formatted.contains("voice: en-us-female"));
1315 assert!(formatted.contains("quality: high"));
1316 }
1317
1318 #[test]
1319 fn test_metadata_writer_file_operations() {
1320 let mut temp_file = NamedTempFile::new().unwrap();
1321
1322 let minimal_wav = create_minimal_wav_file();
1324 temp_file.write_all(&minimal_wav).unwrap();
1325 temp_file.flush().unwrap();
1326
1327 let metadata_writer = MetadataWriter::for_synthesis();
1328
1329 let result = metadata_writer.write_to_file(temp_file.path(), AudioFormat::Wav);
1331 assert!(result.is_ok());
1332 }
1333
1334 #[test]
1335 fn test_album_art_image_dimensions() {
1336 let png_data = vec![
1338 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, ];
1344 let png_art = AlbumArt::from_data(
1345 png_data,
1346 "image/png".to_string(),
1347 "Test PNG".to_string(),
1348 PictureType::CoverFront,
1349 );
1350 assert_eq!(png_art.get_dimensions(), Some((8, 8)));
1351
1352 let gif_data = vec![
1354 0x47, 0x49, 0x46, 0x38, 0x37, 0x61, 0x10, 0x00, 0x18, 0x00, ];
1358 let gif_art = AlbumArt::from_data(
1359 gif_data,
1360 "image/gif".to_string(),
1361 "Test GIF".to_string(),
1362 PictureType::CoverFront,
1363 );
1364 assert_eq!(gif_art.get_dimensions(), Some((16, 24)));
1365
1366 let mut bmp_data = vec![0; 26];
1368 bmp_data[0] = b'B';
1369 bmp_data[1] = b'M';
1370 bmp_data[18..22].copy_from_slice(&(32u32).to_le_bytes());
1372 bmp_data[22..26].copy_from_slice(&(32u32).to_le_bytes());
1374
1375 let bmp_art = AlbumArt::from_data(
1376 bmp_data,
1377 "image/bmp".to_string(),
1378 "Test BMP".to_string(),
1379 PictureType::CoverFront,
1380 );
1381 assert_eq!(bmp_art.get_dimensions(), Some((32, 32)));
1382
1383 let invalid_art = AlbumArt::from_data(
1385 vec![0xFF, 0xFF, 0xFF],
1386 "image/unknown".to_string(),
1387 "Invalid".to_string(),
1388 PictureType::Other,
1389 );
1390 assert_eq!(invalid_art.get_dimensions(), None);
1391 }
1392
1393 #[test]
1394 fn test_synchsafe_integer_encoding() {
1395 let test_values = [0, 127, 128, 16383, 16384, 2097151];
1397
1398 for &value in &test_values {
1399 let encoded = MetadataWriter::encode_synchsafe_int(value);
1400 let decoded = MetadataWriter::decode_synchsafe_int(&encoded).unwrap();
1401 assert_eq!(value, decoded, "Failed for value: {}", value);
1402 }
1403 }
1404
1405 #[test]
1406 fn test_wav_info_field_creation() {
1407 let writer = MetadataWriter::new();
1408 let field = writer.create_wav_info_field("INAM", "Test Title");
1409
1410 assert_eq!(&field[0..4], b"INAM");
1412 let size = u32::from_le_bytes([field[4], field[5], field[6], field[7]]);
1413 assert_eq!(size, 10); assert_eq!(&field[8..18], b"Test Title");
1415 }
1416}