1use crate::{Album, AlbumPage, Artist, ArtistPage, LastFmError, Result, Track, TrackPage};
8use scraper::{Html, Selector};
9
10#[derive(Debug, Clone)]
15pub struct LastFmParser;
16
17impl LastFmParser {
18 pub fn new() -> Self {
20 Self
21 }
22
23 pub fn parse_recent_scrobbles(&self, document: &Html) -> Result<Vec<Track>> {
26 let mut tracks = Vec::new();
27
28 let table_selector = Selector::parse("table.chartlist").unwrap();
30 let row_selector = Selector::parse("tbody tr").unwrap();
31
32 let tables: Vec<_> = document.select(&table_selector).collect();
33 log::debug!("Found {} chartlist tables", tables.len());
34
35 for table in tables {
36 for row in table.select(&row_selector) {
37 if let Ok(track) = self.parse_recent_scrobble_row(&row) {
38 tracks.push(track);
39 }
40 }
41 }
42
43 if tracks.is_empty() {
44 log::debug!("No tracks found in recent scrobbles");
45 }
46
47 log::debug!("Parsed {} recent scrobbles", tracks.len());
48 Ok(tracks)
49 }
50
51 fn parse_recent_scrobble_row(&self, row: &scraper::ElementRef) -> Result<Track> {
53 let name_selector = Selector::parse(".chartlist-name a").unwrap();
55 let name = row
56 .select(&name_selector)
57 .next()
58 .ok_or(LastFmError::Parse("Missing track name".to_string()))?
59 .text()
60 .collect::<String>()
61 .trim()
62 .to_string();
63
64 let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
66 let artist = row
67 .select(&artist_selector)
68 .next()
69 .ok_or(LastFmError::Parse("Missing artist name".to_string()))?
70 .text()
71 .collect::<String>()
72 .trim()
73 .to_string();
74
75 let timestamp = self.extract_scrobble_timestamp(row);
77
78 let album = self.extract_scrobble_album(row);
80
81 let album_artist = self.extract_scrobble_album_artist(row);
83
84 let playcount = 1;
86
87 Ok(Track {
88 name,
89 artist,
90 playcount,
91 timestamp,
92 album,
93 album_artist,
94 })
95 }
96
97 fn extract_scrobble_timestamp(&self, row: &scraper::ElementRef) -> Option<u64> {
99 if let Some(timestamp_str) = row.value().attr("data-timestamp") {
103 if let Ok(timestamp) = timestamp_str.parse::<u64>() {
104 return Some(timestamp);
105 }
106 }
107
108 let timestamp_input_selector = Selector::parse("input[name='timestamp']").unwrap();
110 if let Some(input) = row.select(×tamp_input_selector).next() {
111 if let Some(value) = input.value().attr("value") {
112 if let Ok(timestamp) = value.parse::<u64>() {
113 return Some(timestamp);
114 }
115 }
116 }
117
118 let edit_form_selector =
120 Selector::parse("form[data-edit-scrobble] input[name='timestamp']").unwrap();
121 if let Some(timestamp_input) = row.select(&edit_form_selector).next() {
122 if let Some(value) = timestamp_input.value().attr("value") {
123 if let Ok(timestamp) = value.parse::<u64>() {
124 return Some(timestamp);
125 }
126 }
127 }
128
129 None
132 }
133
134 fn extract_scrobble_album(&self, row: &scraper::ElementRef) -> Option<String> {
136 let album_input_selector =
138 Selector::parse("form[data-edit-scrobble] input[name='album_name']").unwrap();
139
140 if let Some(album_input) = row.select(&album_input_selector).next() {
141 if let Some(album_name) = album_input.value().attr("value") {
142 if !album_name.is_empty() {
143 return Some(album_name.to_string());
144 }
145 }
146 }
147
148 None
149 }
150
151 fn extract_scrobble_album_artist(&self, row: &scraper::ElementRef) -> Option<String> {
153 let album_artist_input_selector =
155 Selector::parse("form[data-edit-scrobble] input[name='album_artist_name']").unwrap();
156
157 if let Some(album_artist_input) = row.select(&album_artist_input_selector).next() {
158 if let Some(album_artist_name) = album_artist_input.value().attr("value") {
159 if !album_artist_name.is_empty() {
160 return Some(album_artist_name.to_string());
161 }
162 }
163 }
164
165 None
166 }
167
168 pub fn parse_tracks_page(
170 &self,
171 document: &Html,
172 page_number: u32,
173 artist: &str,
174 album: Option<&str>,
175 ) -> Result<TrackPage> {
176 let tracks = self.extract_tracks_from_document(document, artist, album)?;
177
178 let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
180
181 Ok(TrackPage {
182 tracks,
183 page_number,
184 has_next_page,
185 total_pages,
186 })
187 }
188
189 pub fn extract_tracks_from_document(
191 &self,
192 document: &Html,
193 artist: &str,
194 album: Option<&str>,
195 ) -> Result<Vec<Track>> {
196 let mut tracks = Vec::new();
197 let mut seen_tracks = std::collections::HashSet::new();
198
199 log::debug!("Starting track extraction for artist: {artist}, album: {album:?}");
200
201 let track_selector = Selector::parse("[data-track-name]").unwrap();
205 let track_elements: Vec<_> = document.select(&track_selector).collect();
206 log::debug!(
207 "Found {} elements with data-track-name",
208 track_elements.len()
209 );
210
211 for element in track_elements {
212 let track_name = element.value().attr("data-track-name").unwrap_or("");
213 if track_name.is_empty() {
214 continue;
215 }
216 if seen_tracks.contains(track_name) {
217 continue;
218 }
219 seen_tracks.insert(track_name.to_string());
220
221 match self.find_playcount_for_track(document, track_name) {
222 Ok(playcount) => {
223 let timestamp = self.find_timestamp_for_track(document, track_name);
224 let track = Track {
225 name: track_name.to_string(),
226 artist: artist.to_string(),
227 playcount,
228 timestamp,
229 album: album.map(|a| a.to_string()),
230 album_artist: None, };
232 tracks.push(track);
233 log::debug!("Added track '{track_name}' with {playcount} plays");
234 }
235 Err(e) => {
236 log::debug!("FAILED to find playcount for track '{track_name}': {e}");
237 }
238 }
239 }
240
241 let table_selector = Selector::parse("table.chartlist").unwrap();
243 let tables: Vec<_> = document.select(&table_selector).collect();
244
245 for table in tables {
246 let row_selector = Selector::parse("tbody tr").unwrap();
247 let rows: Vec<_> = table.select(&row_selector).collect();
248
249 for row in rows.iter() {
250 if let Ok(mut track) = self.parse_track_row(row) {
252 track.artist = artist.to_string();
253 if let Some(album_name) = album {
254 track.album = Some(album_name.to_string());
255 }
256
257 if !seen_tracks.contains(&track.name) {
259 seen_tracks.insert(track.name.clone());
260 tracks.push(track);
261 }
262 }
263 }
264 }
265
266 log::debug!("Successfully extracted {} unique tracks", tracks.len());
267 Ok(tracks)
268 }
269
270 pub fn parse_track_row(&self, row: &scraper::ElementRef) -> Result<Track> {
274 let name = self.extract_name_from_row(row, "track")?;
276
277 let playcount = self.extract_playcount_from_row(row);
279
280 let artist = "".to_string(); Ok(Track {
283 name,
284 artist,
285 playcount,
286 timestamp: None, album: None, album_artist: None, })
290 }
291
292 pub fn parse_albums_page(
294 &self,
295 document: &Html,
296 page_number: u32,
297 artist: &str,
298 ) -> Result<AlbumPage> {
299 let mut albums = Vec::new();
300
301 let album_selector = Selector::parse("[data-album-name]").unwrap();
303 let album_elements: Vec<_> = document.select(&album_selector).collect();
304
305 if !album_elements.is_empty() {
306 log::debug!(
307 "Found {} album elements with data-album-name",
308 album_elements.len()
309 );
310
311 let mut seen_albums = std::collections::HashSet::new();
313
314 for element in album_elements {
315 let album_name = element.value().attr("data-album-name").unwrap_or("");
316 if !album_name.is_empty() && !seen_albums.contains(album_name) {
317 seen_albums.insert(album_name.to_string());
318
319 if let Ok(playcount) = self.find_playcount_for_album(document, album_name) {
320 let timestamp = self.find_timestamp_for_album(document, album_name);
321 let album = Album {
322 name: album_name.to_string(),
323 artist: artist.to_string(),
324 playcount,
325 timestamp,
326 };
327 albums.push(album);
328 }
329 }
330 }
331 } else {
332 albums = self.parse_albums_from_rows(document, artist)?;
334 }
335
336 let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
337
338 Ok(AlbumPage {
339 albums,
340 page_number,
341 has_next_page,
342 total_pages,
343 })
344 }
345
346 fn parse_albums_from_rows(&self, document: &Html, artist: &str) -> Result<Vec<Album>> {
348 let mut albums = Vec::new();
349 let table_selector = Selector::parse("table.chartlist").unwrap();
350 let row_selector = Selector::parse("tbody tr").unwrap();
351
352 for table in document.select(&table_selector) {
353 for row in table.select(&row_selector) {
354 if let Ok(mut album) = self.parse_album_row(&row) {
355 album.artist = artist.to_string();
356 albums.push(album);
357 }
358 }
359 }
360 Ok(albums)
361 }
362
363 pub fn parse_album_row(&self, row: &scraper::ElementRef) -> Result<Album> {
365 let name = self.extract_name_from_row(row, "album")?;
367
368 let playcount = self.extract_playcount_from_row(row);
370
371 let artist = "".to_string(); Ok(Album {
374 name,
375 artist,
376 playcount,
377 timestamp: None, })
379 }
380
381 pub fn parse_track_search_results(&self, document: &Html) -> Result<Vec<Track>> {
388 let mut tracks = Vec::new();
389
390 let table_selector = Selector::parse("table.chartlist").unwrap();
392 let row_selector = Selector::parse("tbody tr").unwrap();
393
394 let tables: Vec<_> = document.select(&table_selector).collect();
395 log::debug!("Found {} chartlist tables in search results", tables.len());
396
397 for table in tables {
398 for row in table.select(&row_selector) {
399 if let Ok(track) = self.parse_search_track_row(&row) {
400 tracks.push(track);
401 }
402 }
403 }
404
405 log::debug!("Parsed {} tracks from search results", tracks.len());
406 Ok(tracks)
407 }
408
409 pub fn parse_album_search_results(&self, document: &Html) -> Result<Vec<Album>> {
414 let mut albums = Vec::new();
415
416 let table_selector = Selector::parse("table.chartlist").unwrap();
418 let row_selector = Selector::parse("tbody tr").unwrap();
419
420 let tables: Vec<_> = document.select(&table_selector).collect();
421 log::debug!(
422 "Found {} chartlist tables in album search results",
423 tables.len()
424 );
425
426 for table in tables {
427 for row in table.select(&row_selector) {
428 if let Ok(album) = self.parse_search_album_row(&row) {
429 albums.push(album);
430 }
431 }
432 }
433
434 log::debug!("Parsed {} albums from search results", albums.len());
435 Ok(albums)
436 }
437
438 pub fn parse_artist_search_results(&self, document: &Html) -> Result<Vec<Artist>> {
443 let mut artists = Vec::new();
444
445 let table_selector = Selector::parse("table.chartlist").unwrap();
447 let row_selector = Selector::parse("tbody tr").unwrap();
448
449 let tables: Vec<_> = document.select(&table_selector).collect();
450 log::debug!(
451 "Found {} chartlist tables in artist search results",
452 tables.len()
453 );
454
455 for table in tables {
456 for row in table.select(&row_selector) {
457 if let Ok(artist) = self.parse_search_artist_row(&row) {
458 artists.push(artist);
459 }
460 }
461 }
462
463 log::debug!("Parsed {} artists from search results", artists.len());
464 Ok(artists)
465 }
466
467 fn parse_search_artist_row(&self, row: &scraper::ElementRef) -> Result<Artist> {
469 let name_selector = Selector::parse("td.chartlist-name a").unwrap();
471 let name = row
472 .select(&name_selector)
473 .next()
474 .ok_or(LastFmError::Parse(
475 "Missing artist name in search results".to_string(),
476 ))?
477 .text()
478 .collect::<String>()
479 .trim()
480 .to_string();
481
482 let playcount = self.extract_playcount_from_row(row);
484
485 Ok(Artist {
486 name,
487 playcount,
488 timestamp: None, })
490 }
491
492 fn parse_search_track_row(&self, row: &scraper::ElementRef) -> Result<Track> {
494 let name = self.extract_name_from_row(row, "track")?;
496
497 let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
499 let artist = row
500 .select(&artist_selector)
501 .next()
502 .map(|el| el.text().collect::<String>().trim().to_string())
503 .ok_or_else(|| {
504 LastFmError::Parse("Missing artist name in search results".to_string())
505 })?;
506
507 let playcount = self.extract_playcount_from_row(row);
509
510 let timestamp = None;
512
513 let album = self.extract_album_from_search_row(row);
515 let album_artist = self.extract_album_artist_from_search_row(row);
516
517 Ok(Track {
518 name,
519 artist,
520 playcount,
521 timestamp,
522 album,
523 album_artist,
524 })
525 }
526
527 fn parse_search_album_row(&self, row: &scraper::ElementRef) -> Result<Album> {
529 let name = self.extract_name_from_row(row, "album")?;
531
532 let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
534 let artist = row
535 .select(&artist_selector)
536 .next()
537 .map(|el| el.text().collect::<String>().trim().to_string())
538 .ok_or_else(|| {
539 LastFmError::Parse("Missing artist name in album search results".to_string())
540 })?;
541
542 let playcount = self.extract_playcount_from_row(row);
544
545 Ok(Album {
546 name,
547 artist,
548 playcount,
549 timestamp: None, })
551 }
552
553 fn extract_album_from_search_row(&self, row: &scraper::ElementRef) -> Option<String> {
555 let album_input_selector = Selector::parse("input[name='album']").unwrap();
557 if let Some(input) = row.select(&album_input_selector).next() {
558 if let Some(value) = input.value().attr("value") {
559 let album = value.trim().to_string();
560 if !album.is_empty() {
561 return Some(album);
562 }
563 }
564 }
565 None
566 }
567
568 fn extract_album_artist_from_search_row(&self, row: &scraper::ElementRef) -> Option<String> {
570 let album_artist_input_selector = Selector::parse("input[name='album_artist']").unwrap();
572 if let Some(input) = row.select(&album_artist_input_selector).next() {
573 if let Some(value) = input.value().attr("value") {
574 let album_artist = value.trim().to_string();
575 if !album_artist.is_empty() {
576 return Some(album_artist);
577 }
578 }
579 }
580 None
581 }
582
583 fn extract_name_from_row(&self, row: &scraper::ElementRef, item_type: &str) -> Result<String> {
587 let name_selector = Selector::parse(".chartlist-name a").unwrap();
588 let name = row
589 .select(&name_selector)
590 .next()
591 .map(|el| el.text().collect::<String>().trim().to_string())
592 .ok_or_else(|| LastFmError::Parse(format!("Missing {item_type} name")))?;
593 Ok(name)
594 }
595
596 fn extract_playcount_from_row(&self, row: &scraper::ElementRef) -> u32 {
598 let playcount_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
599 let mut playcount = 1; if let Some(element) = row.select(&playcount_selector).next() {
602 let text = element.text().collect::<String>().trim().to_string();
603 if let Some(number_part) = text.split_whitespace().next() {
605 if let Ok(count) = number_part.parse::<u32>() {
606 playcount = count;
607 }
608 }
609 }
610 playcount
611 }
612
613 pub fn parse_pagination(
615 &self,
616 document: &Html,
617 _current_page: u32,
618 ) -> Result<(bool, Option<u32>)> {
619 let pagination_selector = Selector::parse(".pagination-list").unwrap();
620
621 if let Some(pagination) = document.select(&pagination_selector).next() {
622 let next_selectors = [
624 "a[aria-label=\"Next\"]",
625 ".pagination-next a",
626 "a:contains(\"Next\")",
627 ".next a",
628 ];
629
630 let mut has_next = false;
631 for selector_str in &next_selectors {
632 if let Ok(selector) = Selector::parse(selector_str) {
633 if pagination.select(&selector).next().is_some() {
634 has_next = true;
635 break;
636 }
637 }
638 }
639
640 let total_pages = self.extract_total_pages_from_pagination(&pagination);
642
643 Ok((has_next, total_pages))
644 } else {
645 Ok((false, Some(1)))
647 }
648 }
649
650 fn extract_total_pages_from_pagination(&self, pagination: &scraper::ElementRef) -> Option<u32> {
652 let text = pagination.text().collect::<String>();
654 if let Some(of_pos) = text.find(" of ") {
655 let after_of = &text[of_pos + 4..];
656 if let Some(number_end) = after_of.find(|c: char| !c.is_ascii_digit()) {
657 if let Ok(total) = after_of[..number_end].parse::<u32>() {
658 return Some(total);
659 }
660 } else if let Ok(total) = after_of.trim().parse::<u32>() {
661 return Some(total);
662 }
663 }
664 None
665 }
666
667 pub fn find_timestamp_for_track(&self, _document: &Html, _track_name: &str) -> Option<u64> {
673 None
675 }
676
677 pub fn find_playcount_for_track(&self, document: &Html, track_name: &str) -> Result<u32> {
678 let count_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
680 let link_selector = Selector::parse("a[href*=\"/music/\"]").unwrap();
681
682 for link in document.select(&link_selector) {
684 let link_text = link.text().collect::<String>().trim().to_string();
685 if link_text == track_name {
686 if let Some(row) = self.find_ancestor_row(link) {
687 if let Some(count_element) = row.select(&count_selector).next() {
688 let text = count_element.text().collect::<String>().trim().to_string();
689 if let Some(number_part) = text.split_whitespace().next() {
690 if let Ok(count) = number_part.parse::<u32>() {
691 return Ok(count);
692 }
693 }
694 }
695 }
696 }
697 }
698 Err(LastFmError::Parse(format!(
699 "Could not find playcount for track: {track_name}"
700 )))
701 }
702
703 pub fn find_timestamp_for_album(&self, _document: &Html, _album_name: &str) -> Option<u64> {
704 None
706 }
707
708 pub fn find_playcount_for_album(&self, document: &Html, album_name: &str) -> Result<u32> {
709 let count_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
711 let link_selector = Selector::parse("a[href*=\"/music/\"]").unwrap();
712
713 for link in document.select(&link_selector) {
715 let link_text = link.text().collect::<String>().trim().to_string();
716 if link_text == album_name {
717 if let Some(row) = self.find_ancestor_row(link) {
718 if let Some(count_element) = row.select(&count_selector).next() {
719 let text = count_element.text().collect::<String>().trim().to_string();
720 if let Some(number_part) = text.split_whitespace().next() {
721 if let Ok(count) = number_part.parse::<u32>() {
722 return Ok(count);
723 }
724 }
725 }
726 }
727 }
728 }
729 Err(LastFmError::Parse(format!(
730 "Could not find playcount for album: {album_name}"
731 )))
732 }
733
734 pub fn find_ancestor_row<'a>(
735 &self,
736 element: scraper::ElementRef<'a>,
737 ) -> Option<scraper::ElementRef<'a>> {
738 let mut current = element;
739 while let Some(parent) = current.parent() {
740 if let Some(parent_elem) = scraper::ElementRef::wrap(parent) {
741 if parent_elem.value().name() == "tr" {
742 return Some(parent_elem);
743 }
744 current = parent_elem;
745 } else {
746 break;
747 }
748 }
749 None
750 }
751
752 pub fn parse_artists_page(&self, document: &Html, page_number: u32) -> Result<ArtistPage> {
754 let mut artists = Vec::new();
755
756 let table_selector = Selector::parse("table.chartlist").unwrap();
758 let row_selector = Selector::parse("tr.js-link-block").unwrap();
759
760 let tables: Vec<_> = document.select(&table_selector).collect();
761 log::debug!("Found {} chartlist tables for artists", tables.len());
762
763 for table in tables {
764 for row in table.select(&row_selector) {
765 if let Ok(artist) = self.parse_artist_row(&row) {
766 artists.push(artist);
767 }
768 }
769 }
770
771 log::debug!("Parsed {} artists from page {}", artists.len(), page_number);
772
773 let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
774
775 Ok(ArtistPage {
776 artists,
777 page_number,
778 has_next_page,
779 total_pages,
780 })
781 }
782
783 fn parse_artist_row(&self, row: &scraper::ElementRef) -> Result<Artist> {
785 let name_selector = Selector::parse("td.chartlist-name a").unwrap();
787 let name = row
788 .select(&name_selector)
789 .next()
790 .ok_or(LastFmError::Parse("Missing artist name".to_string()))?
791 .text()
792 .collect::<String>()
793 .trim()
794 .to_string();
795
796 let count_selector = Selector::parse(".chartlist-count-bar").unwrap();
798 let playcount = if let Some(count_element) = row.select(&count_selector).next() {
799 let count_text = count_element.text().collect::<String>();
800 self.extract_number_from_count_text(&count_text)
801 .unwrap_or(0)
802 } else {
803 0
804 };
805
806 let timestamp = None;
808
809 Ok(Artist {
810 name,
811 playcount,
812 timestamp,
813 })
814 }
815
816 fn extract_number_from_count_text(&self, text: &str) -> Option<u32> {
818 let cleaned = text.replace(',', "");
820 cleaned.split_whitespace().next()?.parse::<u32>().ok()
821 }
822}
823
824impl Default for LastFmParser {
825 fn default() -> Self {
826 Self::new()
827 }
828}