1use crate::types::*;
8use crate::network::*;
9use crate::error::UGError;
10use regex::Regex;
11use std::str::FromStr;
12
13const END_OF_CHORDS_DELIM: &str = "","revision_id":";
14const START_OF_CHORDS_DELIM: &str = "":{"wiki_tab":{"content":"";
15const HTML_BLACKLIST: [&str; 1] = [""type":"Video""];
16const VALID_LINK_REGEX: &str = r"http[s]*:\/\/[www.]*[tabs.]*ultimate-guitar.com\/tab\/[\S]+";
17const METADATA_REGEX: &str = r""adsupp_binary_blocked":null,"meta":\{["capo":]*(\d*)[,]*"[tonality":"]*(\w*)[","]*tuning":\{"name":"([^:]*)","value":"([^:]*)",";
18const BASIC_DATA_REGEX: &str = r"tab":\{"id":(\d+),"song_id":(\d+),"song_name":"([^:]+)","artist_id":\d+,"artist_name":"([^:]+)","type":"([\w\s]+)","part":";
19
20pub fn get_song_data(url: &str, replace_german_names: bool) -> Result<Song, Box<dyn std::error::Error>> {
43 let raw_html: String;
44 match get_raw_html(url) {
45 Ok(s) => raw_html = s,
46 Err(e) => return Err(e.into()),
47 }
48 let song_lines: Vec<Line> = get_tab_lines(&raw_html, replace_german_names)?;
49 let song_metadata: Option<SongMetaData>;
50 let basic_song_data: BasicSongData;
51 match get_basic_metadata(&raw_html, url) {
52 Ok(d) => {
53 song_metadata = extract_metadata(&raw_html);
54 basic_song_data = d;
55 }
56 Err(e) => return Err(e.into())
57 }
58 let song: Song = Song { lines: song_lines, metadata: song_metadata, basic_data: basic_song_data };
59 Ok(song)
60}
61
62pub fn get_basic_metadata(raw_html: &str, tab_link: &str) -> Result<BasicSongData, UGError> {
92 validate_html(raw_html)?;
93 validate_link(tab_link)?;
94
95 let regex = Regex::new(BASIC_DATA_REGEX).unwrap();
96 let captures = regex.captures(raw_html);
97 if let Some(cap) = captures {
98 let song_type: DataSetType = get_data_type(&cap[5]).unwrap_or_default();
99 let tab_id = match u32::from_str(&cap[1]) {
100 Ok(i) => i,
101 Err(_e) => return Err(UGError::UnexpectedWebResultError),
102 };
103 let song_id = match u32::from_str(&cap[2]) {
104 Ok(i) => i,
105 Err(_e) => return Err(UGError::UnexpectedWebResultError),
106 };
107 let title = unescape_string(&cap[3]).to_string();
108 let artist = unescape_string(&cap[4]).to_string();
109 let song_basic_meta: BasicSongData = BasicSongData { title,
110 artist,
111 tab_link: tab_link.to_string(),
112 song_id,
113 tab_id,
114 data_type: song_type };
115 Ok(song_basic_meta)
116 } else {
117 Err(UGError::NoBasicDataMatchError)
118 }
119}
120
121pub fn get_tab_lines(raw_html: &str, replace_german_names: bool) -> Result<Vec<Line>, UGError> {
144 validate_html(raw_html)?;
145 let string_parts: Vec<&str> = raw_html.split(END_OF_CHORDS_DELIM).collect();
146 let raw_data: &str = string_parts[0].split(START_OF_CHORDS_DELIM).collect::<Vec<&str>>()[1];
147 let formatted_string_lines = unescape_string(raw_data);
148 let lines: Vec<Line> = clean_and_evaluate(formatted_string_lines.lines(), replace_german_names);
149 Ok(lines)
150}
151
152pub fn validate_link(url: &str) -> Result<(), UGError> {
156 let regex = Regex::new(VALID_LINK_REGEX).unwrap();
157 let captures = regex.captures(url);
158 match captures {
159 Some(_d) => Ok(()),
160 None => Err(UGError::InvalidURLError),
161 }
162}
163
164fn validate_html(raw_html: &str) -> Result<(), UGError> {
165 for item in HTML_BLACKLIST {
166 if raw_html.contains(item) {
167 return Err(UGError::InvalidHTMLError)
168 }
169 }
170 if !raw_html.contains(START_OF_CHORDS_DELIM) || !raw_html.contains(END_OF_CHORDS_DELIM) {
171 return Err(UGError::InvalidHTMLError)
172 }
173 Ok(())
174}
175
176fn extract_metadata(raw_html: &str) -> Option<SongMetaData> {
177 let regex = Regex::new(METADATA_REGEX).unwrap();
178 let captures = regex.captures(raw_html);
179 let mut song_metadata: SongMetaData = SongMetaData::default();
180 if let Some(cap) = captures {
181 let mut capture_options: [Option<String>; 4] = [Some(cap[1].to_string()),
182 Some(cap[2].to_string()),
183 Some(cap[3].to_string()),
184 Some(cap[4].to_string())];
185 for i in 0..4 {
186 if capture_options[i].clone().unwrap().is_empty() {
187 capture_options[i] = None;
188 }
189 match i {
190 0 => song_metadata.capo = capture_options[i].clone(),
191 1 => song_metadata.tonality = capture_options[i].clone(),
192 2 => song_metadata.tuning_name = capture_options[i].clone(),
193 3 => song_metadata.tuning = capture_options[i].clone(),
194 _ => (),
195 }
196 }
197 } else {
198 return None
199 }
200 Some(song_metadata)
201}
202
203fn clean_and_evaluate(lines: std::str::Lines<'_>, replace_german_names: bool) -> Vec<Line> {
204 let mut clean_lines: Vec<Line> = Vec::new();
205 for line in lines {
206 let mut line_type: DataType = DataType::Lyric;
207 if line.contains("[ch]") {
208 line_type = DataType::Chord;
209 }
210 let mut clean_line: String = String::from(line);
211 for key in ["[ch]", "[/ch]", "[tab]", "[/tab]"] {
212 clean_line = clean_line.replace(key, "")
213 }
214 if clean_line.contains("[") && clean_line.contains("]") && line_type != DataType::Chord {
215 line_type = DataType::SectionTitle;
216 }
217 let mut line = Line {line_type, text_data: clean_line};
218 if replace_german_names {
219 line = line.replace_german_names();
220 }
221 clean_lines.push(line);
222 }
223 clean_lines
224}
225
226#[cfg(test)]
227mod tests {
228 use core::panic;
229
230 use super::*;
231
232 #[test]
233 fn get_lines_of_tab() {
234 let tabs_to_get = ["https://tabs.ultimate-guitar.com/tab/367279",
235 "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"];
236 for tab in tabs_to_get {
237 println!("Getting tab: {}", tab);
238 assert!(!matches!(get_tab_lines(&get_raw_html(tab).unwrap(), true), Err(UGError::InvalidHTMLError)));
239 }
240 }
241
242 #[test]
243 fn tab_link_validation() {
244 assert_eq!(validate_link("https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"), Ok(()));
245 assert_ne!(validate_link("tabs.ultimate-guitar.com/tab/refused/rather-be-dead-power-595658"), Ok(()));
246 }
247
248 #[test]
249 fn type_detection() {
250 let type_detection_checks: Vec<(DataSetType, &str)> = vec![(DataSetType::Chords, "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"),
251 (DataSetType::Chords, "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"),
252 (DataSetType::Bass, "https://tabs.ultimate-guitar.com/tab/bloc-party/this-modern-love-bass-180218"),
253 (DataSetType::Tab, "https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488"),
254 (DataSetType::Ukulele, "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967"),
255 (DataSetType::Drums, "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599"),
256 (DataSetType::Bass, "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995")];
257 for check in type_detection_checks {
258 println!("Testing valid url: {}", check.1);
259 assert_eq!(get_basic_metadata(&get_raw_html(check.1).unwrap(), check.1).unwrap().data_type, check.0);
260 }
261 }
262
263 #[test]
264 fn validate_page_contents() {
265 let valid_page_urls = vec!["https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549",
266 "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741",
267 "https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488",
268 "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967",
269 "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599",
270 "https://tabs.ultimate-guitar.com/tab/blink-182/feeling-this-bass-104175",
271 "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995",
272 "https://tabs.ultimate-guitar.com/tab/367279"];
273 for valid_page_url in valid_page_urls {
274 println!("Testing valid url: {}", valid_page_url);
275 assert!(!matches!(validate_html(&get_raw_html(valid_page_url).unwrap()), Err(UGError::InvalidHTMLError)));
276 }
277
278 let invalid_page_urls = vec!["https://tabs.ultimate-guitar.com/tab/refused/i-wanna-watch-the-world-burn-guitar-pro-5868920",
279 "https://tabs.ultimate-guitar.com/tab/refused/rather-be-dead-power-595658",
280 "https://tabs.ultimate-guitar.com/tab/the-beatles/let-it-be-video-781202",
281 "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=RDdQw4w9WgXcQ"];
282 for invalid_page_url in invalid_page_urls {
283 println!("Testing invalid url: {}", invalid_page_url);
284 assert!(matches!(validate_html(&get_raw_html(invalid_page_url).unwrap()), Err(UGError::InvalidHTMLError)));
285 }
286 }
287
288 #[test]
289 fn get_basic_data() {
290 let test_sets: Vec<(&str, &str, &str, u32, u32)> = vec![("https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549",
291 "Dont Stop Me Now", "Queen", 15591, 519549),
292 ("https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741",
293 "Never Gonna Give You Up", "Rick Astley", 196324, 521741),
294 ("https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488",
295 "Stairway To Heaven", "Led Zeppelin", 31683, 9488),
296 ("https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967",
297 "Wenn Es Gut Ist", "Olli Schulz", 317511, 1381967),
298 ("https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599",
299 "In The Air Tonight", "Phil Collins", 138587, 880599),
300 ("https://tabs.ultimate-guitar.com/tab/blink-182/feeling-this-bass-104175",
301 "Feeling This", "Blink-182", 54209, 104175), ("https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995",
303 "Empty Spaces", "Pink Floyd", 17357, 147995),
304 ("https://tabs.ultimate-guitar.com/tab/367279",
305 "Zu Spät", "Die Ärzte", 1577513, 367279)];
306
307 for set in test_sets {
308 let result = get_basic_metadata(&get_raw_html(set.0).unwrap(), set.0).unwrap();
309 assert_eq!(result.title, set.1);
310 assert_eq!(result.artist, set.2);
311 assert_eq!(result.song_id, set.3);
312 assert_eq!(result.tab_id, set.4)
313 }
314 }
315
316 #[test]
317 fn get_metadata() {
318 let url_metadata_sets: Vec<(Option<SongMetaData>, &str)> = vec![(Some(SongMetaData {
319 capo: Some(String::from("3")),
320 tonality: None,
321 tuning_name: Some(String::from("G C E A")),
322 tuning: Some(String::from("G C E A")) }), "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967"),
323 (None, "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995"),
324 (None, "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599"),
325 (Some(SongMetaData { capo: Some(String::from("1")),
326 tonality: None,
327 tuning_name: Some(String::from("Standard")),
328 tuning: Some(String::from("E A D G B E")) }), "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"),
329 (Some(SongMetaData { capo: None,
330 tonality: Some(String::from("F")),
331 tuning_name: Some(String::from("Standard")),
332 tuning: Some(String::from("E A D G B E")) }), "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"),];
333 for url_metadata_set in url_metadata_sets {
334 println!("Testing url: {}", stringify!(get_type(&get_raw_html(url_metadata_set.1).unwrap()).unwrap()));
335 match extract_metadata(&get_raw_html(url_metadata_set.1).unwrap()) {
336 Some(d) => assert_eq!(d, url_metadata_set.0.unwrap()),
337 None => {
338 if url_metadata_set.0.is_some() {
339 panic!("Found metadata for song without known metadata.")
340 }
341 },
342 }
343 }
344 }
345}