artistpath_core/
parsing.rs

1use crate::string_normalization::clean_str;
2use byteorder::{LittleEndian, ReadBytesExt};
3use rustc_hash::FxHashMap;
4use std::{
5    io::{Cursor, Read},
6    path::Path,
7};
8use uuid::Uuid;
9
10pub struct Artist {
11    pub id: Uuid,
12    pub name: String,
13    pub url: String,
14}
15
16struct SectionOffsets {
17    lookup: usize,
18    metadata: usize,
19    forward_index: usize,
20    reverse_index: usize,
21}
22
23type NameLookup = FxHashMap<String, Vec<Uuid>>;
24type ArtistMetadata = FxHashMap<Uuid, Artist>;
25type GraphIndex = FxHashMap<Uuid, u64>;
26
27pub fn parse_unified_metadata(metadata_path: &Path) -> (NameLookup, ArtistMetadata, GraphIndex, GraphIndex) {
28    let binary_data = read_binary_file(metadata_path);
29    let section_offsets = read_section_offsets(&binary_data);
30    
31    let name_lookup = parse_name_lookup_section(&binary_data, section_offsets.lookup);
32    let artist_metadata = parse_artist_metadata_section(&binary_data, section_offsets.metadata);
33    let forward_index = parse_graph_index_section(&binary_data, section_offsets.forward_index);
34    let reverse_index = parse_graph_index_section(&binary_data, section_offsets.reverse_index);
35    
36    (name_lookup, artist_metadata, forward_index, reverse_index)
37}
38
39fn read_binary_file(file_path: &Path) -> Vec<u8> {
40    std::fs::read(file_path).expect("Should be able to read metadata binary file")
41}
42
43fn read_section_offsets(data: &[u8]) -> SectionOffsets {
44    let mut cursor = Cursor::new(data);
45    
46    SectionOffsets {
47        lookup: cursor.read_u32::<LittleEndian>().expect("Should read lookup offset") as usize,
48        metadata: cursor.read_u32::<LittleEndian>().expect("Should read metadata offset") as usize,
49        forward_index: cursor.read_u32::<LittleEndian>().expect("Should read forward index offset") as usize,
50        reverse_index: cursor.read_u32::<LittleEndian>().expect("Should read reverse index offset") as usize,
51    }
52}
53
54fn parse_name_lookup_section(data: &[u8], offset: usize) -> NameLookup {
55    let mut cursor = Cursor::new(&data[offset..]);
56    let entry_count = cursor.read_u32::<LittleEndian>().expect("Should read lookup count") as usize;
57    let mut name_lookup = FxHashMap::with_capacity_and_hasher(entry_count, Default::default());
58    
59    for _ in 0..entry_count {
60        let clean_name = read_length_prefixed_string(&mut cursor);
61        let uuid_count = cursor.read_u16::<LittleEndian>().expect("Should read UUID count") as usize;
62        let mut uuids = Vec::with_capacity(uuid_count);
63        for _ in 0..uuid_count {
64            uuids.push(read_uuid(&mut cursor));
65        }
66        name_lookup.insert(clean_name, uuids);
67    }
68    
69    name_lookup
70}
71
72fn parse_artist_metadata_section(data: &[u8], offset: usize) -> ArtistMetadata {
73    let mut cursor = Cursor::new(&data[offset..]);
74    let entry_count = cursor.read_u32::<LittleEndian>().expect("Should read metadata count") as usize;
75    let mut artist_metadata = FxHashMap::with_capacity_and_hasher(entry_count, Default::default());
76    
77    for _ in 0..entry_count {
78        let artist_uuid = read_uuid(&mut cursor);
79        let artist_name = read_length_prefixed_string(&mut cursor);
80        let artist_url = read_length_prefixed_string(&mut cursor);
81        
82        let artist = Artist {
83            id: artist_uuid,
84            name: artist_name,
85            url: artist_url,
86        };
87        
88        artist_metadata.insert(artist_uuid, artist);
89    }
90    
91    artist_metadata
92}
93
94fn parse_graph_index_section(data: &[u8], offset: usize) -> GraphIndex {
95    let mut cursor = Cursor::new(&data[offset..]);
96    let entry_count = cursor.read_u32::<LittleEndian>().expect("Should read index count") as usize;
97    let mut graph_index = FxHashMap::with_capacity_and_hasher(entry_count, Default::default());
98    
99    for _ in 0..entry_count {
100        let artist_uuid = read_uuid(&mut cursor);
101        let file_position = cursor.read_u64::<LittleEndian>().expect("Should read position");
102        graph_index.insert(artist_uuid, file_position);
103    }
104    
105    graph_index
106}
107
108fn read_length_prefixed_string(cursor: &mut Cursor<&[u8]>) -> String {
109    let string_length = cursor.read_u16::<LittleEndian>().expect("Should read string length") as usize;
110    let mut string_bytes = vec![0u8; string_length];
111    cursor.read_exact(&mut string_bytes).expect("Should read string bytes");
112    String::from_utf8(string_bytes).expect("Should parse string as UTF-8")
113}
114
115fn read_uuid(cursor: &mut Cursor<&[u8]>) -> Uuid {
116    let mut uuid_bytes = [0u8; 16];
117    cursor.read_exact(&mut uuid_bytes).expect("Should read UUID bytes");
118    Uuid::from_bytes(uuid_bytes)
119}
120
121pub fn find_artist_id(name: &str, lookup: &FxHashMap<String, Vec<Uuid>>) -> Result<Uuid, String> {
122    let clean_name = clean_str(name);
123    lookup
124        .get(&clean_name)
125        .and_then(|uuids| uuids.first().copied())
126        .ok_or_else(|| format!("Artist '{}' not found in database", name))
127}