use crate::{Album, AlbumPage, Artist, ArtistPage, LastFmError, Result, Track, TrackPage};
use scraper::{Html, Selector};
#[derive(Debug, Clone)]
pub struct LastFmParser;
impl LastFmParser {
pub fn new() -> Self {
Self
}
pub fn parse_recent_scrobbles(&self, document: &Html) -> Result<Vec<Track>> {
let mut tracks = Vec::new();
let table_selector = Selector::parse("table.chartlist").unwrap();
let row_selector = Selector::parse("tbody tr").unwrap();
let tables: Vec<_> = document.select(&table_selector).collect();
log::debug!("Found {} chartlist tables", tables.len());
for table in tables {
for row in table.select(&row_selector) {
if let Ok(track) = self.parse_recent_scrobble_row(&row) {
tracks.push(track);
}
}
}
if tracks.is_empty() {
log::debug!("No tracks found in recent scrobbles");
}
log::debug!("Parsed {} recent scrobbles", tracks.len());
Ok(tracks)
}
fn parse_recent_scrobble_row(&self, row: &scraper::ElementRef) -> Result<Track> {
let name_selector = Selector::parse(".chartlist-name a").unwrap();
let name = row
.select(&name_selector)
.next()
.ok_or(LastFmError::Parse("Missing track name".to_string()))?
.text()
.collect::<String>()
.trim()
.to_string();
let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
let artist = row
.select(&artist_selector)
.next()
.ok_or(LastFmError::Parse("Missing artist name".to_string()))?
.text()
.collect::<String>()
.trim()
.to_string();
let timestamp = self.extract_scrobble_timestamp(row);
let album = self.extract_scrobble_album(row);
let album_artist = self.extract_scrobble_album_artist(row);
let playcount = 1;
Ok(Track {
name,
artist,
playcount,
timestamp,
album,
album_artist,
})
}
fn extract_scrobble_timestamp(&self, row: &scraper::ElementRef) -> Option<u64> {
if let Some(timestamp_str) = row.value().attr("data-timestamp") {
if let Ok(timestamp) = timestamp_str.parse::<u64>() {
return Some(timestamp);
}
}
let timestamp_input_selector = Selector::parse("input[name='timestamp']").unwrap();
if let Some(input) = row.select(×tamp_input_selector).next() {
if let Some(value) = input.value().attr("value") {
if let Ok(timestamp) = value.parse::<u64>() {
return Some(timestamp);
}
}
}
let edit_form_selector =
Selector::parse("form[data-edit-scrobble] input[name='timestamp']").unwrap();
if let Some(timestamp_input) = row.select(&edit_form_selector).next() {
if let Some(value) = timestamp_input.value().attr("value") {
if let Ok(timestamp) = value.parse::<u64>() {
return Some(timestamp);
}
}
}
None
}
fn extract_scrobble_album(&self, row: &scraper::ElementRef) -> Option<String> {
let album_input_selector =
Selector::parse("form[data-edit-scrobble] input[name='album_name']").unwrap();
if let Some(album_input) = row.select(&album_input_selector).next() {
if let Some(album_name) = album_input.value().attr("value") {
if !album_name.is_empty() {
return Some(album_name.to_string());
}
}
}
None
}
fn extract_scrobble_album_artist(&self, row: &scraper::ElementRef) -> Option<String> {
let album_artist_input_selector =
Selector::parse("form[data-edit-scrobble] input[name='album_artist_name']").unwrap();
if let Some(album_artist_input) = row.select(&album_artist_input_selector).next() {
if let Some(album_artist_name) = album_artist_input.value().attr("value") {
if !album_artist_name.is_empty() {
return Some(album_artist_name.to_string());
}
}
}
None
}
pub fn parse_tracks_page(
&self,
document: &Html,
page_number: u32,
artist: &str,
album: Option<&str>,
) -> Result<TrackPage> {
let tracks = self.extract_tracks_from_document(document, artist, album)?;
let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
Ok(TrackPage {
tracks,
page_number,
has_next_page,
total_pages,
})
}
pub fn extract_tracks_from_document(
&self,
document: &Html,
artist: &str,
album: Option<&str>,
) -> Result<Vec<Track>> {
let mut tracks = Vec::new();
let mut seen_tracks = std::collections::HashSet::new();
log::debug!("Starting track extraction for artist: {artist}, album: {album:?}");
let track_selector = Selector::parse("[data-track-name]").unwrap();
let track_elements: Vec<_> = document.select(&track_selector).collect();
log::debug!(
"Found {} elements with data-track-name",
track_elements.len()
);
for element in track_elements {
let track_name = element.value().attr("data-track-name").unwrap_or("");
if track_name.is_empty() {
continue;
}
if seen_tracks.contains(track_name) {
continue;
}
seen_tracks.insert(track_name.to_string());
match self.find_playcount_for_track(document, track_name) {
Ok(playcount) => {
let timestamp = self.find_timestamp_for_track(document, track_name);
let track = Track {
name: track_name.to_string(),
artist: artist.to_string(),
playcount,
timestamp,
album: album.map(|a| a.to_string()),
album_artist: None, };
tracks.push(track);
log::debug!("Added track '{track_name}' with {playcount} plays");
}
Err(e) => {
log::debug!("FAILED to find playcount for track '{track_name}': {e}");
}
}
}
let table_selector = Selector::parse("table.chartlist").unwrap();
let tables: Vec<_> = document.select(&table_selector).collect();
for table in tables {
let row_selector = Selector::parse("tbody tr").unwrap();
let rows: Vec<_> = table.select(&row_selector).collect();
for row in rows.iter() {
if let Ok(mut track) = self.parse_track_row(row) {
track.artist = artist.to_string();
if let Some(album_name) = album {
track.album = Some(album_name.to_string());
}
if !seen_tracks.contains(&track.name) {
seen_tracks.insert(track.name.clone());
tracks.push(track);
}
}
}
}
log::debug!("Successfully extracted {} unique tracks", tracks.len());
Ok(tracks)
}
pub fn parse_track_row(&self, row: &scraper::ElementRef) -> Result<Track> {
let name = self.extract_name_from_row(row, "track")?;
let playcount = self.extract_playcount_from_row(row);
let artist = "".to_string();
Ok(Track {
name,
artist,
playcount,
timestamp: None, album: None, album_artist: None, })
}
pub fn parse_albums_page(
&self,
document: &Html,
page_number: u32,
artist: &str,
) -> Result<AlbumPage> {
let mut albums = Vec::new();
let album_selector = Selector::parse("[data-album-name]").unwrap();
let album_elements: Vec<_> = document.select(&album_selector).collect();
if !album_elements.is_empty() {
log::debug!(
"Found {} album elements with data-album-name",
album_elements.len()
);
let mut seen_albums = std::collections::HashSet::new();
for element in album_elements {
let album_name = element.value().attr("data-album-name").unwrap_or("");
if !album_name.is_empty() && !seen_albums.contains(album_name) {
seen_albums.insert(album_name.to_string());
if let Ok(playcount) = self.find_playcount_for_album(document, album_name) {
let timestamp = self.find_timestamp_for_album(document, album_name);
let album = Album {
name: album_name.to_string(),
artist: artist.to_string(),
playcount,
timestamp,
};
albums.push(album);
}
}
}
} else {
albums = self.parse_albums_from_rows(document, artist)?;
}
let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
Ok(AlbumPage {
albums,
page_number,
has_next_page,
total_pages,
})
}
fn parse_albums_from_rows(&self, document: &Html, artist: &str) -> Result<Vec<Album>> {
let mut albums = Vec::new();
let table_selector = Selector::parse("table.chartlist").unwrap();
let row_selector = Selector::parse("tbody tr").unwrap();
for table in document.select(&table_selector) {
for row in table.select(&row_selector) {
if let Ok(mut album) = self.parse_album_row(&row) {
album.artist = artist.to_string();
albums.push(album);
}
}
}
Ok(albums)
}
pub fn parse_album_row(&self, row: &scraper::ElementRef) -> Result<Album> {
let name = self.extract_name_from_row(row, "album")?;
let playcount = self.extract_playcount_from_row(row);
let artist = "".to_string();
Ok(Album {
name,
artist,
playcount,
timestamp: None, })
}
pub fn parse_track_search_results(&self, document: &Html) -> Result<Vec<Track>> {
let mut tracks = Vec::new();
let table_selector = Selector::parse("table.chartlist").unwrap();
let row_selector = Selector::parse("tbody tr").unwrap();
let tables: Vec<_> = document.select(&table_selector).collect();
log::debug!("Found {} chartlist tables in search results", tables.len());
for table in tables {
for row in table.select(&row_selector) {
if let Ok(track) = self.parse_search_track_row(&row) {
tracks.push(track);
}
}
}
log::debug!("Parsed {} tracks from search results", tracks.len());
Ok(tracks)
}
pub fn parse_album_search_results(&self, document: &Html) -> Result<Vec<Album>> {
let mut albums = Vec::new();
let table_selector = Selector::parse("table.chartlist").unwrap();
let row_selector = Selector::parse("tbody tr").unwrap();
let tables: Vec<_> = document.select(&table_selector).collect();
log::debug!(
"Found {} chartlist tables in album search results",
tables.len()
);
for table in tables {
for row in table.select(&row_selector) {
if let Ok(album) = self.parse_search_album_row(&row) {
albums.push(album);
}
}
}
log::debug!("Parsed {} albums from search results", albums.len());
Ok(albums)
}
pub fn parse_artist_search_results(&self, document: &Html) -> Result<Vec<Artist>> {
let mut artists = Vec::new();
let table_selector = Selector::parse("table.chartlist").unwrap();
let row_selector = Selector::parse("tbody tr").unwrap();
let tables: Vec<_> = document.select(&table_selector).collect();
log::debug!(
"Found {} chartlist tables in artist search results",
tables.len()
);
for table in tables {
for row in table.select(&row_selector) {
if let Ok(artist) = self.parse_search_artist_row(&row) {
artists.push(artist);
}
}
}
log::debug!("Parsed {} artists from search results", artists.len());
Ok(artists)
}
fn parse_search_artist_row(&self, row: &scraper::ElementRef) -> Result<Artist> {
let name_selector = Selector::parse("td.chartlist-name a").unwrap();
let name = row
.select(&name_selector)
.next()
.ok_or(LastFmError::Parse(
"Missing artist name in search results".to_string(),
))?
.text()
.collect::<String>()
.trim()
.to_string();
let playcount = self.extract_playcount_from_row(row);
Ok(Artist {
name,
playcount,
timestamp: None, })
}
fn parse_search_track_row(&self, row: &scraper::ElementRef) -> Result<Track> {
let name = self.extract_name_from_row(row, "track")?;
let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
let artist = row
.select(&artist_selector)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.ok_or_else(|| {
LastFmError::Parse("Missing artist name in search results".to_string())
})?;
let playcount = self.extract_playcount_from_row(row);
let timestamp = None;
let album = self.extract_album_from_search_row(row);
let album_artist = self.extract_album_artist_from_search_row(row);
Ok(Track {
name,
artist,
playcount,
timestamp,
album,
album_artist,
})
}
fn parse_search_album_row(&self, row: &scraper::ElementRef) -> Result<Album> {
let name = self.extract_name_from_row(row, "album")?;
let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
let artist = row
.select(&artist_selector)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.ok_or_else(|| {
LastFmError::Parse("Missing artist name in album search results".to_string())
})?;
let playcount = self.extract_playcount_from_row(row);
Ok(Album {
name,
artist,
playcount,
timestamp: None, })
}
fn extract_album_from_search_row(&self, row: &scraper::ElementRef) -> Option<String> {
let album_input_selector = Selector::parse("input[name='album']").unwrap();
if let Some(input) = row.select(&album_input_selector).next() {
if let Some(value) = input.value().attr("value") {
let album = value.trim().to_string();
if !album.is_empty() {
return Some(album);
}
}
}
None
}
fn extract_album_artist_from_search_row(&self, row: &scraper::ElementRef) -> Option<String> {
let album_artist_input_selector = Selector::parse("input[name='album_artist']").unwrap();
if let Some(input) = row.select(&album_artist_input_selector).next() {
if let Some(value) = input.value().attr("value") {
let album_artist = value.trim().to_string();
if !album_artist.is_empty() {
return Some(album_artist);
}
}
}
None
}
fn extract_name_from_row(&self, row: &scraper::ElementRef, item_type: &str) -> Result<String> {
let name_selector = Selector::parse(".chartlist-name a").unwrap();
let name = row
.select(&name_selector)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.ok_or_else(|| LastFmError::Parse(format!("Missing {item_type} name")))?;
Ok(name)
}
fn extract_playcount_from_row(&self, row: &scraper::ElementRef) -> u32 {
let playcount_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
let mut playcount = 1;
if let Some(element) = row.select(&playcount_selector).next() {
let text = element.text().collect::<String>().trim().to_string();
if let Some(number_part) = text.split_whitespace().next() {
if let Ok(count) = number_part.parse::<u32>() {
playcount = count;
}
}
}
playcount
}
pub fn parse_pagination(
&self,
document: &Html,
_current_page: u32,
) -> Result<(bool, Option<u32>)> {
let pagination = [
Selector::parse(".pagination-list").unwrap(),
Selector::parse(".pagination").unwrap(),
]
.into_iter()
.find_map(|sel| document.select(&sel).next());
if let Some(pagination) = pagination {
let next_selectors = [
"a[aria-label=\"Next\"]",
"a[aria-label=\"Next page\"]",
"a[rel=\"next\"]",
".pagination-next a",
"a:contains(\"Next\")",
".next a",
];
let mut has_next = false;
for selector_str in &next_selectors {
if let Ok(selector) = Selector::parse(selector_str) {
if pagination.select(&selector).next().is_some() {
has_next = true;
break;
}
}
}
let total_pages = self.extract_total_pages_from_pagination(&pagination);
Ok((has_next, total_pages))
} else {
Ok((false, Some(1)))
}
}
fn extract_total_pages_from_pagination(&self, pagination: &scraper::ElementRef) -> Option<u32> {
let text = pagination.text().collect::<String>();
if let Some(of_pos) = text.find(" of ") {
let after_of = &text[of_pos + 4..];
if let Some(number_end) = after_of.find(|c: char| !c.is_ascii_digit()) {
if let Ok(total) = after_of[..number_end].parse::<u32>() {
return Some(total);
}
} else if let Ok(total) = after_of.trim().parse::<u32>() {
return Some(total);
}
}
let extract_page_param = |href: &str| -> Option<u32> {
let idx = href.find("page=")?;
let after = &href[idx + "page=".len()..];
let digits: String = after.chars().take_while(|c| c.is_ascii_digit()).collect();
if digits.is_empty() {
return None;
}
digits.parse::<u32>().ok()
};
let link_selector = Selector::parse("a[href*=\"page=\"]").unwrap();
let mut max_page = None::<u32>;
for a in pagination.select(&link_selector) {
if let Some(href) = a.value().attr("href") {
if let Some(p) = extract_page_param(href) {
max_page = Some(max_page.map_or(p, |m| m.max(p)));
}
}
let label = a.text().collect::<String>().trim().to_string();
if !label.is_empty() && label.chars().all(|c| c.is_ascii_digit()) {
if let Ok(p) = label.parse::<u32>() {
max_page = Some(max_page.map_or(p, |m| m.max(p)));
}
}
}
max_page
}
pub fn find_timestamp_for_track(&self, _document: &Html, _track_name: &str) -> Option<u64> {
None
}
pub fn find_playcount_for_track(&self, document: &Html, track_name: &str) -> Result<u32> {
let count_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
let link_selector = Selector::parse("a[href*=\"/music/\"]").unwrap();
for link in document.select(&link_selector) {
let link_text = link.text().collect::<String>().trim().to_string();
if link_text == track_name {
if let Some(row) = self.find_ancestor_row(link) {
if let Some(count_element) = row.select(&count_selector).next() {
let text = count_element.text().collect::<String>().trim().to_string();
if let Some(number_part) = text.split_whitespace().next() {
if let Ok(count) = number_part.parse::<u32>() {
return Ok(count);
}
}
}
}
}
}
Err(LastFmError::Parse(format!(
"Could not find playcount for track: {track_name}"
)))
}
pub fn find_timestamp_for_album(&self, _document: &Html, _album_name: &str) -> Option<u64> {
None
}
pub fn find_playcount_for_album(&self, document: &Html, album_name: &str) -> Result<u32> {
let count_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
let link_selector = Selector::parse("a[href*=\"/music/\"]").unwrap();
for link in document.select(&link_selector) {
let link_text = link.text().collect::<String>().trim().to_string();
if link_text == album_name {
if let Some(row) = self.find_ancestor_row(link) {
if let Some(count_element) = row.select(&count_selector).next() {
let text = count_element.text().collect::<String>().trim().to_string();
if let Some(number_part) = text.split_whitespace().next() {
if let Ok(count) = number_part.parse::<u32>() {
return Ok(count);
}
}
}
}
}
}
Err(LastFmError::Parse(format!(
"Could not find playcount for album: {album_name}"
)))
}
pub fn find_ancestor_row<'a>(
&self,
element: scraper::ElementRef<'a>,
) -> Option<scraper::ElementRef<'a>> {
let mut current = element;
while let Some(parent) = current.parent() {
if let Some(parent_elem) = scraper::ElementRef::wrap(parent) {
if parent_elem.value().name() == "tr" {
return Some(parent_elem);
}
current = parent_elem;
} else {
break;
}
}
None
}
pub fn parse_artists_page(&self, document: &Html, page_number: u32) -> Result<ArtistPage> {
let mut artists = Vec::new();
let table_selector = Selector::parse("table.chartlist").unwrap();
let row_selector = Selector::parse("tr.js-link-block").unwrap();
let tables: Vec<_> = document.select(&table_selector).collect();
log::debug!("Found {} chartlist tables for artists", tables.len());
for table in tables {
for row in table.select(&row_selector) {
if let Ok(artist) = self.parse_artist_row(&row) {
artists.push(artist);
}
}
}
log::debug!("Parsed {} artists from page {}", artists.len(), page_number);
let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
Ok(ArtistPage {
artists,
page_number,
has_next_page,
total_pages,
})
}
fn parse_artist_row(&self, row: &scraper::ElementRef) -> Result<Artist> {
let name_selector = Selector::parse("td.chartlist-name a").unwrap();
let name = row
.select(&name_selector)
.next()
.ok_or(LastFmError::Parse("Missing artist name".to_string()))?
.text()
.collect::<String>()
.trim()
.to_string();
let count_selector = Selector::parse(".chartlist-count-bar").unwrap();
let playcount = if let Some(count_element) = row.select(&count_selector).next() {
let count_text = count_element.text().collect::<String>();
self.extract_number_from_count_text(&count_text)
.unwrap_or(0)
} else {
0
};
let timestamp = None;
Ok(Artist {
name,
playcount,
timestamp,
})
}
fn extract_number_from_count_text(&self, text: &str) -> Option<u32> {
let cleaned = text.replace(',', "");
cleaned.split_whitespace().next()?.parse::<u32>().ok()
}
}
impl Default for LastFmParser {
fn default() -> Self {
Self::new()
}
}