artistpath_core/
parsing.rs1use crate::string_normalization::clean_str;
2use byteorder::{LittleEndian, ReadBytesExt};
3use rustc_hash::FxHashMap;
4use std::{
5 io::{Cursor, Read},
6 path::Path,
7};
8use uuid::Uuid;
9
10pub struct Artist {
11 pub id: Uuid,
12 pub name: String,
13 pub url: String,
14}
15
16struct SectionOffsets {
17 lookup: usize,
18 metadata: usize,
19 forward_index: usize,
20 reverse_index: usize,
21}
22
23type NameLookup = FxHashMap<String, Vec<Uuid>>;
24type ArtistMetadata = FxHashMap<Uuid, Artist>;
25type GraphIndex = FxHashMap<Uuid, u64>;
26
27pub fn parse_unified_metadata(metadata_path: &Path) -> (NameLookup, ArtistMetadata, GraphIndex, GraphIndex) {
28 let binary_data = read_binary_file(metadata_path);
29 let section_offsets = read_section_offsets(&binary_data);
30
31 let name_lookup = parse_name_lookup_section(&binary_data, section_offsets.lookup);
32 let artist_metadata = parse_artist_metadata_section(&binary_data, section_offsets.metadata);
33 let forward_index = parse_graph_index_section(&binary_data, section_offsets.forward_index);
34 let reverse_index = parse_graph_index_section(&binary_data, section_offsets.reverse_index);
35
36 (name_lookup, artist_metadata, forward_index, reverse_index)
37}
38
39fn read_binary_file(file_path: &Path) -> Vec<u8> {
40 std::fs::read(file_path).expect("Should be able to read metadata binary file")
41}
42
43fn read_section_offsets(data: &[u8]) -> SectionOffsets {
44 let mut cursor = Cursor::new(data);
45
46 SectionOffsets {
47 lookup: cursor.read_u32::<LittleEndian>().expect("Should read lookup offset") as usize,
48 metadata: cursor.read_u32::<LittleEndian>().expect("Should read metadata offset") as usize,
49 forward_index: cursor.read_u32::<LittleEndian>().expect("Should read forward index offset") as usize,
50 reverse_index: cursor.read_u32::<LittleEndian>().expect("Should read reverse index offset") as usize,
51 }
52}
53
54fn parse_name_lookup_section(data: &[u8], offset: usize) -> NameLookup {
55 let mut cursor = Cursor::new(&data[offset..]);
56 let entry_count = cursor.read_u32::<LittleEndian>().expect("Should read lookup count") as usize;
57 let mut name_lookup = FxHashMap::with_capacity_and_hasher(entry_count, Default::default());
58
59 for _ in 0..entry_count {
60 let clean_name = read_length_prefixed_string(&mut cursor);
61 let uuid_count = cursor.read_u16::<LittleEndian>().expect("Should read UUID count") as usize;
62 let mut uuids = Vec::with_capacity(uuid_count);
63 for _ in 0..uuid_count {
64 uuids.push(read_uuid(&mut cursor));
65 }
66 name_lookup.insert(clean_name, uuids);
67 }
68
69 name_lookup
70}
71
72fn parse_artist_metadata_section(data: &[u8], offset: usize) -> ArtistMetadata {
73 let mut cursor = Cursor::new(&data[offset..]);
74 let entry_count = cursor.read_u32::<LittleEndian>().expect("Should read metadata count") as usize;
75 let mut artist_metadata = FxHashMap::with_capacity_and_hasher(entry_count, Default::default());
76
77 for _ in 0..entry_count {
78 let artist_uuid = read_uuid(&mut cursor);
79 let artist_name = read_length_prefixed_string(&mut cursor);
80 let artist_url = read_length_prefixed_string(&mut cursor);
81
82 let artist = Artist {
83 id: artist_uuid,
84 name: artist_name,
85 url: artist_url,
86 };
87
88 artist_metadata.insert(artist_uuid, artist);
89 }
90
91 artist_metadata
92}
93
94fn parse_graph_index_section(data: &[u8], offset: usize) -> GraphIndex {
95 let mut cursor = Cursor::new(&data[offset..]);
96 let entry_count = cursor.read_u32::<LittleEndian>().expect("Should read index count") as usize;
97 let mut graph_index = FxHashMap::with_capacity_and_hasher(entry_count, Default::default());
98
99 for _ in 0..entry_count {
100 let artist_uuid = read_uuid(&mut cursor);
101 let file_position = cursor.read_u64::<LittleEndian>().expect("Should read position");
102 graph_index.insert(artist_uuid, file_position);
103 }
104
105 graph_index
106}
107
108fn read_length_prefixed_string(cursor: &mut Cursor<&[u8]>) -> String {
109 let string_length = cursor.read_u16::<LittleEndian>().expect("Should read string length") as usize;
110 let mut string_bytes = vec![0u8; string_length];
111 cursor.read_exact(&mut string_bytes).expect("Should read string bytes");
112 String::from_utf8(string_bytes).expect("Should parse string as UTF-8")
113}
114
115fn read_uuid(cursor: &mut Cursor<&[u8]>) -> Uuid {
116 let mut uuid_bytes = [0u8; 16];
117 cursor.read_exact(&mut uuid_bytes).expect("Should read UUID bytes");
118 Uuid::from_bytes(uuid_bytes)
119}
120
121pub fn find_artist_id(name: &str, lookup: &FxHashMap<String, Vec<Uuid>>) -> Result<Uuid, String> {
122 let clean_name = clean_str(name);
123 lookup
124 .get(&clean_name)
125 .and_then(|uuids| uuids.first().copied())
126 .ok_or_else(|| format!("Artist '{}' not found in database", name))
127}