music_metadata/
lib.rs

1// 如果frame有description不定长字段,如果frame中为空,则设置为String::from("null")
2mod flac;
3mod id3;
4mod ogg;
5mod util;
6
7use std::collections::HashMap;
8use std::ffi::OsString;
9use std::fs;
10use std::fs::File;
11use std::io;
12use std::path::Path;
13
14use flac::blocks::{
15    block_application::BlockApplication,
16    block_cue_sheet::BlockCueSheet,
17    block_header::{BlockHeader, BlockType},
18    block_picture::BlockPicture,
19    block_seektable::BlockSeekTable,
20    block_stream_info::BlockStreamInfo,
21    block_vorbis_comment::BlockVorbisComment,
22};
23use flac::core::parse_block_cue_sheet;
24use flac::flac_buffer_reader::FlacBufferReader;
25use id3::{
26    core::{
27        parse_extended_header, parse_footer_buffer, parse_frame_header, parse_frame_payload,
28        parse_protocol_header,
29    },
30    frames::APIC::PicType,
31};
32use id3::{
33    error::ID3Error, extended_header::ExtendedHeader, footer::Footer, frames::common::Tape,
34    id3_buffer_reader::ID3BufferReader, id3v1_tag::ID3v1, protocol_header::ProtocolHeader,
35};
36
37use ogg::{
38    ogg_buffer_reader::OggBufferReader,
39    ogg_vorbis_comment::{CommentBody, CommentHeader, HeaderType},
40    page::PageHeader,
41};
42use util::{parse_4_bytes_with_little_endian, Buffer};
43
44use flac::core::{
45    parse_block_application, parse_block_header, parse_block_picture, parse_block_seektable,
46    parse_flac_marker, parse_stream_info_block, parse_vorbis_comment,
47};
48
49pub struct ID3Parser<T>
50where
51    T: AsRef<Path>,
52{
53    fp: T,
54    hm: HashMap<String, usize>,
55    /// Some frames appear more than once
56    frames: Vec<Vec<Box<dyn Tape>>>,
57    /// protocol header
58    pub pheader: ProtocolHeader,
59    /// extended header
60    pub eheader: ExtendedHeader,
61    /// sum of extended header (including payload), frames, padding
62    pub padding_size: u32,
63
64    pub footer: Footer,
65    /// ID3v1 tag
66    pub id3v1: ID3v1,
67    /// file size, for locating ID3v1
68    file_size: u64,
69}
70
71impl<T> ID3Parser<T>
72where
73    T: AsRef<Path>,
74{
75    /// Create a new parser.
76    /// 传入一个列表,启动多个线程进行解析:怎么返回值?线程切换成本?
77    /// 用户启动多个线程,每个线程一个ID3Parser
78    /// 异步?对已经确定的Buffer
79    pub fn new(fp: T) -> io::Result<Self> {
80        let file_size = File::open(&fp)?.metadata()?.len();
81        Ok(ID3Parser {
82            fp,
83            hm: HashMap::default(),
84            frames: Vec::default(),
85            pheader: ProtocolHeader::default(),
86            eheader: ExtendedHeader::default(),
87            padding_size: u32::default(),
88            footer: Footer::default(),
89            id3v1: ID3v1::default(),
90            file_size,
91        })
92    }
93
94    /// Return frame content that after decoding.
95    ///
96    /// All text information frames should call this method, including TXXX.
97    ///
98    /// This method is case insensitive.
99    pub fn get(&self, query: &str) -> Option<Vec<String>> {
100        let upper_query = query.to_uppercase();
101        if let Some(index) = self.hm.get(&upper_query) {
102            let mut rst = Vec::default();
103            for d in self.frames[*index].iter() {
104                rst.push(d.message());
105            }
106            Some(rst)
107        } else {
108            None
109        }
110    }
111
112    /// Return raw data without decoding.
113    ///
114    /// APIC should call this method, as should SYLT.
115    ///
116    /// SYLT may call the `get` method in the future.
117    ///
118    /// This method is case insensitive.
119    pub fn get_raw(&self, query: &str) -> Option<Vec<Vec<u8>>> {
120        let upper_query = query.to_uppercase();
121        if let Some(index) = self.hm.get(&upper_query) {
122            let mut rst = Vec::default();
123            for d in self.frames[*index].iter() {
124                rst.push(d.raw());
125            }
126            Some(rst)
127        } else {
128            None
129        }
130    }
131
132    /// Push a frame to self.frames.
133    fn push(&mut self, v: Box<dyn Tape>) -> io::Result<()> {
134        if let Some(index) = self.hm.get(&v.identifier()) {
135            self.frames[*index].push(v);
136        } else {
137            let index = self.frames.len();
138            self.hm.insert(v.identifier(), index);
139            self.frames.push(Vec::default());
140            self.frames[index].push(v);
141        }
142        Ok(())
143    }
144
145    /// Start parsing id3v1.
146    ///
147    /// It is not recommended to call this method,
148    ///
149    /// thinking that the ID3 protocol contains very little information,
150    ///
151    /// unless a very old song.
152    pub fn parse_id3v1(&mut self) -> io::Result<()> {
153        let position = self.file_size - 128;
154        let mut buffer_reader = ID3BufferReader::new(&self.fp)?;
155        buffer_reader.seek_to(position)?;
156        let buffer = buffer_reader.read_id3v1_buffer()?;
157        let mut start: usize = 0;
158        let header: Vec<u8> = (buffer[start..start + 3]).to_vec();
159        // update_start_end()
160        start += 3;
161        let title: Vec<u8> = (buffer[start..start + 30]).to_vec();
162        start += 30;
163        let artist: Vec<u8> = (buffer[start..start + 30]).to_vec();
164        start += 30;
165        let album: Vec<u8> = (buffer[start..start + 30]).to_vec();
166        start += 30;
167        let year: Vec<u8> = (buffer[start..start + 4]).to_vec();
168        start += 4;
169        let comment: Vec<u8> = (buffer[start..start + 30]).to_vec();
170        start += 30;
171        let genre: u8 = buffer[start];
172        self.id3v1 = ID3v1::new(header, title, artist, album, year, comment, genre);
173        Ok(())
174    }
175
176    /// Start parsing id3v2.
177    pub fn parse_id3v2(&mut self) -> io::Result<()> {
178        let mut buffer_reader = ID3BufferReader::new(&self.fp)?;
179
180        let mut buffer: Buffer;
181
182        buffer = buffer_reader.read_protocol_header_buffer()?;
183        let rst = parse_protocol_header(&buffer);
184        if rst.is_err() {
185            println!("not include ID3v2.3 or ID3v2.4");
186            return Ok(());
187        }
188        self.pheader = rst.unwrap();
189        let mut start: u32 = 0;
190        if self.pheader.flags.ExtendedHeader {
191            buffer = buffer_reader.read_extended_header_buffer()?;
192            let mut ext = parse_extended_header(&buffer, &self.pheader.major_version);
193            ext.payload = buffer_reader.skip(ext.len.into())?;
194            self.eheader = ext;
195            start += 10 + self.eheader.len as u32;
196        }
197
198        while start < self.pheader.size {
199            buffer = buffer_reader.read_frame_header_buffer()?;
200            match parse_frame_header(&buffer, &self.pheader.major_version) {
201                Ok(v) => {
202                    // 这里可以优化为异步
203                    // 去解析而不等待返回值,接着获取下一个FrameHeader继续解析
204                    buffer = buffer_reader.read_frame_payload_buffer(v.size)?;
205                    // 优化为异步
206                    match parse_frame_payload(&buffer, &v) {
207                        Ok(v) => {
208                            self.push(v)?;
209                        }
210                        Err(e) => println!("{:?}", e),
211                    }
212                    start += 10 + v.size;
213                }
214                Err(e) => match e {
215                    ID3Error::IsPadding => {
216                        self.padding_size = self.pheader.size - start;
217                        if self.pheader.flags.Footer {
218                            // 将reader的指针定位到footer第一个字节
219                            buffer_reader.seek_to(10 + self.pheader.size as u64)?;
220                            buffer = buffer_reader.read_footer_buffer()?;
221                            self.footer = parse_footer_buffer(&buffer).unwrap();
222                        }
223                        return Ok(());
224                    }
225                    ID3Error::Unimplement(id, skip) => {
226                        let buf = buffer_reader.skip(skip)?;
227                        start += 10 + skip;
228                        println!(
229                            "unimplement: {{
230identifier: {},
231raw: {:?}",
232                            id, buf
233                        );
234                    }
235                    ID3Error::UnknownError(s) => {
236                        println!("{s}");
237                        println!("The parser is stopped");
238                        return Ok(());
239                    }
240                },
241            }
242        }
243        Ok(())
244    }
245
246    /// As the method says.
247    ///
248    /// In addition, its own data will be cleared.
249    pub fn change_target(&mut self, new_fp: T) {
250        self.fp = new_fp;
251        self.hm.clear();
252        self.frames.clear()
253    }
254
255    /// Write APIC frame's raw to the current directory named with filename.jpg like 云烟成雨.jpg if there is only one APIC frame.
256    ///
257    /// Unless, add a underline followd by a number after the filename start with the second one, like 云烟成雨_1.jpg.
258    pub fn write_image(&self) -> io::Result<()> {
259        let mut t = self.fp.as_ref().to_owned();
260        t.set_extension("");
261        if let Some(index) = self.hm.get("APIC") {
262            for (index, d) in self.frames[*index].iter().enumerate() {
263                let pic_type = PicType::from(d.raw().pop().unwrap()).to_string();
264                let mut fname: OsString = OsString::from(&t);
265                fname.push("_mp3_");
266                fname.push(pic_type);
267                if index > 0 {
268                    fname.push("_");
269                    fname.push(index.to_string());
270                }
271                fname.push(".jpg");
272                fs::write(fname, d.raw())?
273            }
274        } else {
275            println!("NO APIC");
276        }
277        Ok(())
278    }
279}
280
281// https://xiph.org/flac/format.html#metadata_block_vorbis_comment
282#[derive(Debug)]
283pub struct FlacParser<T>
284where
285    T: AsRef<Path>,
286{
287    fp: T,
288    pub stream_info: BlockStreamInfo,
289    pub application: BlockApplication,
290    pub seek_table: BlockSeekTable,
291    vorbis_comment: BlockVorbisComment,
292    pub picture: Vec<BlockPicture>,
293    pub cue_sheet: BlockCueSheet,
294    pub padding_length: u32,
295}
296
297#[allow(dead_code)]
298#[allow(unused_assignments)]
299#[allow(unused_variables)]
300impl<T> FlacParser<T>
301where
302    T: AsRef<Path>,
303{
304    /// Create a new FlacParser
305    pub fn new(fp: T) -> io::Result<Self> {
306        Ok(FlacParser {
307            fp,
308            stream_info: BlockStreamInfo::default(),
309            application: BlockApplication::default(),
310            seek_table: BlockSeekTable::default(),
311            vorbis_comment: BlockVorbisComment::default(),
312            picture: Vec::default(),
313            cue_sheet: BlockCueSheet::default(),
314            padding_length: u32::default(),
315        })
316    }
317
318    /// Start parsing flac.
319    pub fn parse(&mut self) -> io::Result<()> {
320        let mut buffer_reader = FlacBufferReader::new(&self.fp)?;
321        let mut buffer: Buffer;
322        buffer = buffer_reader.read_block_header()?;
323        if parse_flac_marker(buffer).is_err() {
324            println!("not include flac header");
325            return Ok(());
326        }
327        let mut block_header: BlockHeader = BlockHeader::default();
328        while !block_header.is_last {
329            buffer = buffer_reader.read_block_header()?;
330            block_header = parse_block_header(buffer)?;
331            buffer = buffer_reader.read_block_data_buffer(block_header.length)?;
332            match block_header.block_type {
333                BlockType::STREAMINFO => {
334                    self.stream_info = parse_stream_info_block(buffer)?;
335                }
336                BlockType::PADDING => {
337                    self.padding_length = block_header.length;
338                    println!("here is padding, is last = {}", block_header.is_last);
339                }
340                BlockType::APPLICATION => {
341                    self.application = parse_block_application(buffer)?;
342                }
343                BlockType::SEEKTABLE => {
344                    self.seek_table = parse_block_seektable(buffer)?;
345                }
346                BlockType::VORBISCOMMENT => {
347                    self.vorbis_comment = parse_vorbis_comment(buffer)?;
348                }
349                BlockType::CUESHEET => {
350                    self.cue_sheet = parse_block_cue_sheet(buffer)?;
351                }
352                BlockType::PICTURE => {
353                    self.picture.push(parse_block_picture(buffer)?);
354                }
355                BlockType::INVALID => todo!(),
356            }
357        }
358        Ok(())
359    }
360
361    /// Get vorbis comment according to query.
362    ///
363    /// Return a Vec<String> wrapped in an Option.
364    pub fn get(&mut self, query: &str) -> Option<Vec<String>> {
365        let upper_query = query.to_uppercase();
366        if let Some(index) = self.vorbis_comment.hm.get(&upper_query) {
367            return Some(self.vorbis_comment.comment[*index].clone());
368        }
369        None
370    }
371
372    /// Given that Vorbis allows for customized key values,
373    ///
374    /// there may be key values other than those in common use,
375    ///
376    /// so this method is provided to print all key-value pairs.
377    pub fn get_all(&mut self) -> io::Result<(Vec<String>, Vec<Vec<String>>)> {
378        let mut key_vec: Vec<String> = Vec::default();
379        let mut value_vec: Vec<Vec<String>> = Vec::default();
380        for (key, index) in &self.vorbis_comment.hm {
381            key_vec.push(key.to_string());
382            value_vec.push(self.vorbis_comment.comment[*index].clone());
383        }
384        Ok((key_vec, value_vec))
385    }
386
387    /// Write image(s) to disk.
388    pub fn write_image(&mut self) -> io::Result<()> {
389        let mut t = self.fp.as_ref().to_owned();
390        t.set_extension("");
391        let mut index = 0;
392        while index < self.picture.len() {
393            let mut raw_data = self.picture[index].data.clone();
394            let pic_type = PicType::from(raw_data.pop().unwrap()).to_string();
395            let mut fname: OsString = OsString::from(&t);
396            fname.push("_flac_");
397            fname.push(pic_type);
398            if index > 0 {
399                fname.push("_");
400                fname.push(index.to_string());
401            }
402            fname.push(".jpg");
403            fs::write(fname, raw_data)?;
404            index += 1;
405        }
406        Ok(())
407    }
408
409    /// As the method says.
410    ///
411    /// In addition, its own data will be cleared.
412    pub fn change_target(&mut self, new_fp: T) {
413        self.fp = new_fp;
414        self.application = BlockApplication::default();
415        self.stream_info = BlockStreamInfo::default();
416        self.seek_table = BlockSeekTable::default();
417        self.vorbis_comment = BlockVorbisComment::default();
418        self.picture.clear();
419        self.cue_sheet = BlockCueSheet::default();
420        self.padding_length = u32::default();
421    }
422}
423
424pub struct OggParser<T>
425where
426    T: AsRef<Path>,
427{
428    fp: T,
429    pub audio_channels: u8,
430    pub audio_sample_rate: u32,
431    pub vorbis_comment: CommentBody,
432    // pub vorbis_comment: Box<dyn FlagTrait>,
433    comment_buffer: Vec<u8>,
434}
435impl<T> OggParser<T>
436where
437    T: AsRef<Path>,
438{
439    pub fn new(fp: T) -> Self {
440        OggParser {
441            fp,
442            audio_channels: u8::default(),
443            audio_sample_rate: u32::default(),
444            // vorbis_comment: Box::new(OggVorbisComment::default()),
445            vorbis_comment: CommentBody::default(),
446            comment_buffer: Vec::default(),
447        }
448    }
449    pub fn parse(&mut self) -> io::Result<()> {
450        let mut buffer_reader = OggBufferReader::new(&self.fp)?;
451        let mut comment_header = CommentHeader::default();
452        // comment缓存
453        // let mut comment_buffer: Vec<u8> = Vec::default();
454        while !comment_header.end {
455            let mut page_header = PageHeader::default();
456            page_header.capture_pattern = String::from_utf8(buffer_reader.read_buffer(4)?).unwrap();
457            page_header.structure_version = buffer_reader.read_one()?;
458            let header_type_flag = buffer_reader.read_one()?;
459            page_header.new_packet = (header_type_flag & 0x01) == 0;
460            page_header.bos = (header_type_flag & 0x02) == 2;
461            page_header.eos = (header_type_flag & 0x04) == 4;
462            page_header.granule_position = buffer_reader.read_buffer(8)?;
463            page_header.serial_number = buffer_reader.read_buffer(4)?;
464            page_header.page_sequence_number = buffer_reader.read_buffer(4)?;
465            page_header.crc_checksum = buffer_reader.read_buffer(4)?;
466            page_header.number_page_segments = buffer_reader.read_one()?;
467            page_header.segment_table =
468                buffer_reader.read_buffer(page_header.number_page_segments as u32)?;
469            let temp = buffer_reader.read_one().unwrap();
470            match temp {
471                0x01 | 0x05 => {
472                    let skip_len: u64 = page_header
473                        .segment_table
474                        .clone()
475                        .into_iter()
476                        .map(|x| x as u64)
477                        .sum();
478                    buffer_reader.skip(skip_len - 1)?;
479                    continue;
480                }
481                _ => {}
482            }
483            let mut packets: Vec<u32> = Vec::new();
484            packets.push(0);
485            // 此页有几个包,包的长度是多少
486            for (i, ele) in page_header.segment_table.into_iter().enumerate() {
487                // 非0xFF
488                if ele != 0xFF {
489                    *packets.last_mut().unwrap() += ele as u32;
490                    if i != page_header.number_page_segments as usize - 1 {
491                        packets.push(0);
492                    }
493                } else {
494                    *packets.last_mut().unwrap() += 0xFF;
495                }
496            }
497            // 第一个包长度-1
498            packets[0] -= 0x01;
499            // 是一个新包,并且有区段,并且第一个字节是0x03
500            if page_header.new_packet && page_header.number_page_segments > 0 && temp == 0x03 {
501                // 将所有数据读入
502                for len in packets {
503                    self.comment_buffer
504                        .append(buffer_reader.read_buffer(len)?.as_mut());
505                }
506                // 进入下一页
507            } else {
508                // 是继承包
509                self.comment_buffer.push(temp);
510                for len in packets {
511                    self.comment_buffer
512                        .append(buffer_reader.read_buffer(len)?.as_mut());
513                    while *self.comment_buffer.last().unwrap() == 0x00 {
514                        self.comment_buffer.pop();
515                    }
516                    if *self.comment_buffer.last().unwrap() == 0x01 {
517                        self.comment_buffer.pop();
518                        comment_header.end = true;
519                        break;
520                    }
521                }
522            }
523        }
524        comment_header.header_type = HeaderType::CommentHeader;
525        comment_header.packet_pattern = String::from_utf8(self.vec_reader(0, 6).to_vec()).unwrap();
526        let company_info_length: usize =
527            parse_4_bytes_with_little_endian(self.vec_reader(6, 4)) as usize;
528        comment_header.company_info =
529            String::from_utf8(self.vec_reader(10, company_info_length).to_vec()).unwrap();
530        let mut start: usize = 10 + company_info_length + 4;
531        while self.comment_buffer.len() - start > 1 {
532            let comment_i_length: usize =
533                parse_4_bytes_with_little_endian(self.vec_reader(start, 4)) as usize;
534            start += 4;
535            let comment_i_content =
536                String::from_utf8(self.vec_reader(start, comment_i_length).to_vec()).unwrap();
537            start += comment_i_length;
538            // store(&mut self.vorbis_comment, comment_i_content);
539            // self.vorbis_comment.store(comment_i_content);
540            let kv: Vec<&str> = comment_i_content.split('=').collect();
541            let comment_key = kv[0];
542            let comment_value: String = kv[1].to_owned();
543            if let Some(index) = self.vorbis_comment.hm.get(comment_key) {
544                self.vorbis_comment.comment[*index].push(comment_value);
545            } else {
546                let comment_index = self.vorbis_comment.comment.len();
547                self.vorbis_comment
548                    .hm
549                    .insert(comment_key.to_uppercase(), comment_index);
550                self.vorbis_comment.comment.push(Vec::default());
551                self.vorbis_comment.comment[comment_index].push(comment_value);
552            }
553        }
554        Ok(())
555    }
556    // return buffer
557    fn vec_reader(&self, start: usize, length: usize) -> &[u8] {
558        &self.comment_buffer[start..start + length]
559    }
560    pub fn get(&mut self, query: &str) -> Option<Vec<String>> {
561        let upper_query = query.to_uppercase();
562        if let Some(index) = self.vorbis_comment.hm.get(&upper_query) {
563            return Some(self.vorbis_comment.comment[*index].clone());
564        }
565        None
566    }
567    pub fn get_all(&mut self) -> io::Result<(Vec<String>, Vec<Vec<String>>)> {
568        let mut key_vec: Vec<String> = Vec::default();
569        let mut value_vec: Vec<Vec<String>> = Vec::default();
570        for (key, index) in &self.vorbis_comment.hm {
571            key_vec.push(key.to_string());
572            value_vec.push(self.vorbis_comment.comment[*index].clone());
573        }
574        Ok((key_vec, value_vec))
575    }
576}