mp3_metadata/
metadata.rs

1use std::fs::File;
2use std::io::Read;
3use std::path::Path;
4use std::time::Duration;
5
6use crate::consts::{BITRATES, SAMPLING_FREQ};
7use crate::enums::{ChannelType, Copyright, Emphasis, Error, Genre, Layer, Status, Version, CRC};
8use crate::types::{AudioTag, Frame, MP3Metadata, OptionalAudioTags};
9use crate::utils::{
10    compute_duration, create_utf8_str, get_line, get_samp_line, get_text_field, get_text_fields,
11};
12use crate::utils::{get_url_field, get_url_fields};
13
14fn get_id3(i: &mut u32, buf: &[u8], meta: &mut MP3Metadata) -> Result<(), Error> {
15    let mut x = *i as usize;
16    // Get extended information
17    if buf.len() > 32 && x + 32 < buf.len() && &buf[x..x + 8] == b"APETAGEX" {
18        // APE
19        *i += 31; // skip APE header / footer
20        Ok(())
21    } else if buf.len() > 127 && x + 127 < buf.len() && &buf[x..x + 3] == b"TAG" {
22        // V1
23        if meta.tag.is_some() {
24            return Err(Error::DuplicatedIDV3);
25        }
26        if let Some(last) = meta.frames.last_mut() {
27            if *i <= last.size {
28                return Ok(());
29            }
30            last.size = *i - last.size - 1;
31        }
32        *i += 126;
33        // tag v1
34        meta.tag = Some(AudioTag {
35            title: create_utf8_str(&buf[x + 3..][..30]),
36            artist: create_utf8_str(&buf[x + 33..][..30]),
37            album: create_utf8_str(&buf[x + 63..][..30]),
38            year: create_utf8_str(&buf[x + 93..][..4])
39                .parse::<u16>()
40                .unwrap_or(0),
41            comment: create_utf8_str(&buf[x + 97..][..if buf[x + 97 + 28] != 0 { 30 } else { 28 }]),
42            genre: Genre::from(buf[x + 127]),
43        });
44        Ok(())
45    } else if buf.len() > x + 13 && &buf[x..x + 3] == b"ID3" {
46        // V2 and above
47        let maj_version = buf[x + 3];
48        let min_version = buf[x + 4];
49
50        if maj_version > 4 {
51            return Ok(());
52        }
53
54        let tag_size = ((buf[x + 9] as usize) & 0xFF)
55            | (((buf[x + 8] as usize) & 0xFF) << 7)
56            | (((buf[x + 7] as usize) & 0xFF) << 14)
57            | ((((buf[x + 6] as usize) & 0xFF) << 21) + 10);
58        let use_sync = buf[x + 5] & 0x80 != 0;
59        let has_extended_header = buf[x + 5] & 0x40 != 0;
60
61        x += 10;
62
63        if has_extended_header {
64            if x + 4 >= buf.len() {
65                *i = x as u32;
66                return Ok(());
67            }
68            let header_size = ((buf[x] as u32) << 21)
69                | ((buf[x + 1] as u32) << 14)
70                | ((buf[x + 2] as u32) << 7)
71                | buf[x + 3] as u32;
72            if header_size < 4 {
73                return Ok(());
74            }
75            x += header_size as usize - 4;
76        }
77
78        *i = x as u32 + tag_size as u32;
79        if x + tag_size >= buf.len() {
80            return Ok(());
81        }
82
83        // Recreate the tag if desynchronization is used inside; we need to replace
84        // 0xFF 0x00 with 0xFF
85        let mut v = Vec::new();
86        let (buf, length) = if use_sync {
87            let mut new_pos = 0;
88            let mut skip = false;
89            v.reserve(tag_size);
90
91            for i in 0..tag_size {
92                if skip {
93                    skip = false;
94                    continue;
95                }
96                if i + 1 >= buf.len() {
97                    return Ok(());
98                }
99                if i + 1 < tag_size && buf[i] == 0xFF && buf[i + 1] == 0 {
100                    if let Some(elem) = v.get_mut(new_pos) {
101                        *elem = 0xFF;
102                    } else {
103                        return Err(Error::InvalidData);
104                    }
105                    new_pos += 1;
106                    skip = true;
107                    continue;
108                }
109                if new_pos >= v.len() {
110                    return Ok(());
111                }
112                v[new_pos] = buf[i];
113                new_pos += 1;
114            }
115            (v.as_slice(), new_pos)
116        } else {
117            (buf, tag_size)
118        };
119
120        let mut pos = x;
121        let id3_frame_size = if maj_version < 3 { 6 } else { 10 };
122        let mut op = OptionalAudioTags::default();
123        let mut changes = false;
124        loop {
125            if pos + id3_frame_size > x + length {
126                break;
127            }
128
129            // Check if there is there a frame.
130            let c = buf[pos];
131            #[allow(clippy::manual_range_contains)]
132            if c < b'A' || c > b'Z' {
133                break;
134            }
135
136            // Frame name is 3 chars in pre-ID3v3 and 4 chars after
137            let (frame_name, frame_size) = if maj_version < 3 {
138                (
139                    &buf[pos..pos + 3],
140                    (buf[pos + 5] as u32 & 0xFF)
141                        | ((buf[pos + 4] as u32 & 0xFF) << 8)
142                        | ((buf[pos + 3] as u32 & 0xFF) << 16),
143                )
144            } else if maj_version < 4 {
145                (
146                    &buf[pos..pos + 4],
147                    (buf[pos + 7] as u32 & 0xFF)
148                        | ((buf[pos + 6] as u32 & 0xFF) << 8)
149                        | ((buf[pos + 5] as u32 & 0xFF) << 16)
150                        | ((buf[pos + 4] as u32 & 0xFF) << 24),
151                )
152            } else {
153                (
154                    &buf[pos..pos + 4],
155                    (buf[pos + 7] as u32 & 0xFF)
156                        | ((buf[pos + 6] as u32 & 0xFF) << 7)
157                        | ((buf[pos + 5] as u32 & 0xFF) << 14)
158                        | ((buf[pos + 4] as u32 & 0xFF) << 21),
159                )
160            };
161
162            pos += id3_frame_size;
163            if pos + frame_size as usize > x + length {
164                break;
165            }
166
167            // http://id3.org/id3v2.3.0#Declared_ID3v2_frames
168            match frame_name {
169                // -----------------------
170                // ----- TEXT FRAMES -----
171                // -----------------------
172                b"TALB" => {
173                    get_text_field(buf, pos, frame_size, &mut changes, &mut op.album_movie_show)
174                }
175                b"TBPM" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.bpm),
176                b"TCOM" => get_text_fields(buf, pos, frame_size, &mut changes, &mut op.composers),
177                b"TCON" => {
178                    let mut s = None;
179                    get_text_field(buf, pos, frame_size, &mut changes, &mut s);
180                    if let Some(s) = s {
181                        if !s.is_empty() {
182                            if s.starts_with('(') && s.ends_with(')') {
183                                let v = s
184                                    .split(')')
185                                    .collect::<Vec<&str>>()
186                                    .into_iter()
187                                    .filter_map(|a| match a.replace('(', "").parse::<u8>() {
188                                        Ok(num) => Some(Genre::from(num)),
189                                        _ => None,
190                                    })
191                                    .collect::<Vec<Genre>>();
192                                if !v.is_empty() {
193                                    for entry in v {
194                                        op.content_type.push(entry);
195                                    }
196                                } else {
197                                    op.content_type.push(Genre::from(s.as_str()));
198                                }
199                            } else {
200                                op.content_type.push(Genre::from(s.as_str()));
201                            }
202                        }
203                    }
204                }
205                b"TCOP" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.copyright),
206                b"TDAT" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.date),
207                b"TDLY" => {
208                    get_text_field(buf, pos, frame_size, &mut changes, &mut op.playlist_delay)
209                }
210                b"TENC" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.encoded_by),
211                b"TEXT" => {
212                    get_text_fields(buf, pos, frame_size, &mut changes, &mut op.text_writers)
213                }
214                b"TFLT" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.file_type),
215                b"TIME" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.time),
216                b"TIT" | b"TIT2" => {
217                    get_text_field(buf, pos, frame_size, &mut changes, &mut op.title)
218                }
219                b"TIT1" => get_text_field(
220                    buf,
221                    pos,
222                    frame_size,
223                    &mut changes,
224                    &mut op.content_group_description,
225                ),
226                b"TIT3" => get_text_field(
227                    buf,
228                    pos,
229                    frame_size,
230                    &mut changes,
231                    &mut op.subtitle_refinement_description,
232                ),
233                b"TKEY" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.initial_key),
234                b"TLAN" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.language),
235                b"TLEN" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.length),
236                b"TMED" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.media_type),
237                b"TOAL" => get_text_field(
238                    buf,
239                    pos,
240                    frame_size,
241                    &mut changes,
242                    &mut op.original_album_move_show_title,
243                ),
244                b"TOFN" => get_text_field(
245                    buf,
246                    pos,
247                    frame_size,
248                    &mut changes,
249                    &mut op.original_filename,
250                ),
251                b"TOLY" => get_text_fields(
252                    buf,
253                    pos,
254                    frame_size,
255                    &mut changes,
256                    &mut op.original_text_writers,
257                ),
258                b"TOPE" => {
259                    get_text_fields(buf, pos, frame_size, &mut changes, &mut op.original_artists)
260                }
261                b"TORY" => get_text_field(
262                    buf,
263                    pos,
264                    frame_size,
265                    &mut changes,
266                    &mut op.original_release_year,
267                ),
268                b"TOWN" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.file_owner),
269                b"TPE1" => get_text_fields(buf, pos, frame_size, &mut changes, &mut op.performers),
270                b"TPE2" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.band),
271                b"TPE3" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.conductor),
272                b"TPE4" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.interpreted),
273                b"TPOS" => {
274                    get_text_field(buf, pos, frame_size, &mut changes, &mut op.part_of_a_set)
275                }
276                b"TPUB" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.publisher),
277                b"TRCK" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.track_number),
278                b"TRDA" => {
279                    get_text_field(buf, pos, frame_size, &mut changes, &mut op.recording_dates)
280                }
281                b"TRSN" => get_text_field(
282                    buf,
283                    pos,
284                    frame_size,
285                    &mut changes,
286                    &mut op.internet_radio_station_name,
287                ),
288                b"TRSO" => get_text_field(
289                    buf,
290                    pos,
291                    frame_size,
292                    &mut changes,
293                    &mut op.internet_radio_station_owner,
294                ),
295                b"TSIZ" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.size),
296                b"TSRC" => get_text_field(
297                    buf,
298                    pos,
299                    frame_size,
300                    &mut changes,
301                    &mut op.international_standard_recording_code,
302                ),
303                b"TSSE" => get_text_field(
304                    buf,
305                    pos,
306                    frame_size,
307                    &mut changes,
308                    &mut op.soft_hard_setting,
309                ),
310                b"TYER" => get_text_field(buf, pos, frame_size, &mut changes, &mut op.year),
311                b"IPLS" => {
312                    get_text_field(buf, pos, frame_size, &mut changes, &mut op.involved_people)
313                }
314                // ----------------------
315                // ----- URL FRAMES -----
316                // ----------------------
317                b"WCOM" => get_url_fields(
318                    buf,
319                    pos,
320                    frame_size,
321                    &mut changes,
322                    &mut op.commercial_info_url,
323                ),
324                b"WCOP" => get_url_field(
325                    buf,
326                    pos,
327                    frame_size,
328                    &mut changes,
329                    &mut op.copyright_info_url,
330                ),
331                b"WOAF" => {
332                    get_url_field(buf, pos, frame_size, &mut changes, &mut op.official_webpage)
333                }
334                b"WOAR" => get_url_fields(
335                    buf,
336                    pos,
337                    frame_size,
338                    &mut changes,
339                    &mut op.official_artist_webpage,
340                ),
341                b"WOAS" => get_url_field(
342                    buf,
343                    pos,
344                    frame_size,
345                    &mut changes,
346                    &mut op.official_audio_source_webpage,
347                ),
348                b"WORS" => get_url_field(
349                    buf,
350                    pos,
351                    frame_size,
352                    &mut changes,
353                    &mut op.official_internet_radio_webpage,
354                ),
355                b"WPAY" => get_url_field(buf, pos, frame_size, &mut changes, &mut op.payment_url),
356                b"WPUB" => get_url_field(
357                    buf,
358                    pos,
359                    frame_size,
360                    &mut changes,
361                    &mut op.publishers_official_webpage,
362                ),
363                _ => {
364                    // TODO: handle other type of fields, like picture
365                }
366            };
367
368            pos += frame_size as usize;
369        }
370        if changes {
371            op.position = meta.frames.len() as u32;
372            op.minor_version = min_version;
373            op.major_version = maj_version;
374            meta.optional_info.push(op);
375        }
376        Ok(())
377    } else {
378        Ok(())
379    }
380}
381
382fn read_header(buf: &[u8], i: &mut u32, meta: &mut MP3Metadata) -> Result<bool, Error> {
383    let header = ((buf[*i as usize] as u32) << 24)
384        | ((buf[*i as usize + 1] as u32) << 16)
385        | ((buf[*i as usize + 2] as u32) << 8)
386        | (buf[*i as usize + 3] as u32);
387    if header & 0xffe00000 == 0xffe00000
388        && header & (3 << 17) != 0
389        && header & (0xf << 12) != (0xf << 12)
390        && header & (3 << 10) != (3 << 10)
391    {
392        let mut frame: Frame = Default::default();
393
394        frame.version = Version::from((header >> 19) & 3);
395        frame.layer = Layer::from((header >> 17) & 3);
396        frame.crc = CRC::from((header >> 16) & 1);
397
398        frame.bitrate =
399            BITRATES[get_line(frame.version, frame.layer)][((header >> 12) & 0xF) as usize];
400        frame.sampling_freq =
401            SAMPLING_FREQ[get_samp_line(frame.version)][((header >> 10) & 0x3) as usize];
402        frame.padding = (header >> 9) & 1 == 1;
403        frame.private_bit = (header >> 8) & 1 == 1;
404
405        frame.chan_type = ChannelType::from((header >> 6) & 3);
406        let (intensity, ms_stereo) = match (header >> 4) & 3 {
407            0x1 => (true, false),
408            0x2 => (false, true),
409            0x3 => (true, true),
410            /*0x00*/ _ => (false, false),
411        };
412        frame.intensity_stereo = intensity;
413        frame.ms_stereo = ms_stereo;
414        frame.copyright = Copyright::from((header >> 3) & 1);
415        frame.status = Status::from((header >> 2) & 1);
416        frame.emphasis = Emphasis::from(header & 0x03);
417        frame.duration = compute_duration(frame.version, frame.layer, frame.sampling_freq);
418        frame.position = meta.duration;
419        frame.offset = *i;
420
421        if let Some(dur) = frame.duration {
422            meta.duration += dur;
423        }
424        /*frame.size = if frame.layer == Layer::Layer1 && frame.sampling_freq > 0 {
425            /*println!("{:4}: (12000 * {} / {} + {}) * 4 = {}", i, frame.bitrate as u64, frame.sampling_freq as u64,
426                if frame.slot { 1 } else { 0 },
427                    (12000 * frame.bitrate as u64 / frame.sampling_freq as u64 +
428                if frame.slot { 1 } else { 0 }) * 4);*/
429
430            (12000 * frame.bitrate as u64 / frame.sampling_freq as u64 +
431                if frame.slot { 1 } else { 0 }) * 4
432        } else if (frame.layer == Layer::Layer2 || frame.layer == Layer::Layer3) && frame.sampling_freq > 0 {
433            /*println!("{:4}: 144000 * {} / {} + {} = {}", i, frame.bitrate as u64, frame.sampling_freq as u64,
434                if frame.slot { 1 } else { 0 },
435                    144000 * frame.bitrate as u64 / frame.sampling_freq as u64 +
436                if frame.slot { 1 } else { 0 });*/
437
438            144000 * frame.bitrate as u64 / frame.sampling_freq as u64 +
439                if frame.slot { 1 } else { 0 }
440        } else {
441            continue 'a;
442        } as u32;*/
443        let samples_per_frame = match frame.layer {
444            Layer::Layer3 => {
445                if frame.version == Version::MPEG1 {
446                    1152
447                } else {
448                    576
449                }
450            }
451            Layer::Layer2 => 1152,
452            Layer::Layer1 => 384,
453            _ => unreachable!(),
454        };
455        frame.size = (samples_per_frame as u64 / 8 * frame.bitrate as u64 * 1000
456            / frame.sampling_freq as u64) as u32;
457        if frame.size < 1 {
458            return Ok(false);
459        }
460        if frame.padding {
461            frame.size += 1;
462        }
463        *i += frame.size;
464        meta.frames.push(frame);
465        Ok(true)
466    } else {
467        Ok(false)
468    }
469}
470
471pub fn read_from_file<P>(file: P) -> Result<MP3Metadata, Error>
472where
473    P: AsRef<Path>,
474{
475    if let Ok(mut fd) = File::open(file) {
476        let mut buf = Vec::new();
477
478        match fd.read_to_end(&mut buf) {
479            Ok(_) => read_from_slice(&buf),
480            Err(_) => Err(Error::FileError),
481        }
482    } else {
483        Err(Error::FileError)
484    }
485}
486
487pub fn read_from_slice(buf: &[u8]) -> Result<MP3Metadata, Error> {
488    let mut meta = MP3Metadata {
489        frames: Vec::new(),
490        duration: Duration::new(0, 0),
491        tag: None,
492        optional_info: Vec::new(),
493    };
494    let mut i = 0u32;
495
496    'a: while i < buf.len() as u32 {
497        loop {
498            get_id3(&mut i, buf, &mut meta)?;
499            if i + 3 >= buf.len() as u32 {
500                break 'a;
501            }
502            match read_header(buf, &mut i, &mut meta) {
503                Ok(true) => continue 'a,
504                Err(e) => return Err(e),
505                _ => {}
506            }
507            let old_i = i;
508            get_id3(&mut i, buf, &mut meta)?;
509            if i == old_i {
510                i += 1;
511            }
512            if i >= buf.len() as u32 {
513                break 'a;
514            }
515        }
516    }
517    if meta.tag.is_none() {
518        if let Some(last) = meta.frames.last_mut() {
519            if i <= last.size {
520                return Err(Error::InvalidData);
521            }
522        }
523    }
524    if meta.frames.is_empty() {
525        Err(Error::NotMP3)
526    } else {
527        Ok(meta)
528    }
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn not_mp3() {
537        let ret = read_from_file("src/lib.rs");
538
539        match ret {
540            Ok(_) => panic!("Wasn't supposed to be ok!"),
541            Err(e) => assert_eq!(e, Error::NotMP3),
542        }
543    }
544
545    #[test]
546    fn double_id() {
547        let ret = read_from_file("assets/double_id.mp3");
548
549        match ret {
550            Ok(_) => panic!("Wasn't supposed to be ok!"),
551            Err(e) => assert_eq!(e, Error::DuplicatedIDV3),
552        }
553    }
554
555    #[test]
556    fn wrong_data() {
557        let data = [
558            255, 0, 0, 16, 0, 12, 0, 5, 43, 51, 61, 61, 90, 0, 0, 50, 5, 255, 239, 32, 61, 61, 61,
559            61, 61, 61, 92, 61, 65, 51, 255, 230, 255, 5, 61, 61, 5, 255, 255, 5, 43, 51, 61, 61,
560            5, 255, 255, 5, 169, 169, 73, 68, 51, 0, 0, 187, 0, 0, 0, 0, 0, 0, 0, 50, 5, 255, 255,
561            5, 169, 169, 73, 68, 51, 0, 0, 187, 0, 0, 0, 0, 0, 0, 0, 0, 51, 180, 255, 0, 0, 51, 5,
562            255, 252, 5, 43, 51, 51, 0, 1, 32, 31, 0, 0, 51, 51, 148, 255, 255, 16, 51, 51, 53,
563            250, 0, 1, 61, 61, 61, 0, 51, 180, 255, 0, 0, 51, 5, 255, 252, 5, 43, 51, 51, 0, 1, 32,
564            31, 0, 0, 51, 5, 255, 255, 5, 169, 169, 73, 68, 51, 0, 0, 187, 0, 0, 0, 0, 0, 0, 0, 50,
565            5, 255, 255, 5, 169, 169, 73, 68, 51, 0, 0, 187, 0, 0, 0, 0, 0, 0, 0, 0, 51, 180, 255,
566            0, 0, 51, 5, 255, 252, 5, 43, 51, 148, 255, 255, 16,
567        ];
568        assert!(read_from_slice(&data).is_err());
569    }
570}