use std::{borrow::Cow, ops::Deref};
use anyhow::{Context, Result};
use tl::{queryselector::QuerySelectorIterator, HTMLTag, NodeHandle, Parser, VDom};
use super::{SearchCategory, SearchItem};
pub fn parse_items(dom: &VDom, parser: &Parser) -> Result<Vec<SearchItem>> {
let item_selectors = select_query_inside_class(dom, parser, "chapter-list", "dd")
.context("unable to parse item list element")
.map(|selectors| select_all_html_tag(selectors, parser))?;
let mut results = vec![];
for html_tag in item_selectors {
results.push(parse_item(html_tag, parser)?);
}
Ok(results)
}
fn parse_item(html_tag: &HTMLTag, parser: &Parser) -> Result<SearchItem> {
let mut tags = vec![];
if let Some(selectors) = html_tag
.query_selector(parser, "a")
.map(|selectors| select_all_html_tag(selectors, parser))
{
for child_html_tag in selectors {
if child_html_tag.attributes().is_class_member("name") {
continue;
}
let href = attribute_from_tag(child_html_tag, "href")
.context("unable to find html_tag permalink")?;
let (permalink, search_kind) = parse_full_permalink(&href)?;
if let SearchCategory::Directory(kind) = search_kind {
tags.push(crate::TagItem {
name: child_html_tag.inner_text(parser).to_string(),
kind,
permalink,
})
}
}
}
let title_html_tag = select_first_html_tag(html_tag, parser, ".name")
.context("unable to find title's anchor tag")?;
let title =
html_escape::decode_html_entities(title_html_tag.inner_text(parser).deref()).to_string();
let href = attribute_from_tag(title_html_tag, "href")
.context("unable to find permalink from the title's anchor tag")?;
let (permalink, kind) = parse_full_permalink(&href)?;
Ok(SearchItem {
title,
permalink,
kind,
tags,
})
}
fn parse_full_permalink(permalink: &str) -> Result<(String, SearchCategory)> {
let (directory, actual_permalink) = permalink[1..]
.split_once('/')
.with_context(|| format!("unable to find permalink delimiter `{}`", permalink))?;
let kind = if let Ok(directory_kind) = directory.parse() {
SearchCategory::Directory(directory_kind)
} else if directory == "chapters" {
SearchCategory::Chapter
} else {
return Err(anyhow::anyhow!("unable to parse permalink `{}`", permalink));
};
Ok((actual_permalink.to_string(), kind))
}
pub fn parse_page_numbers(dom: &VDom, parser: &Parser) -> Result<(u64, u64)> {
let pagination_selectors = select_query_inside_class(dom, parser, "pagination", "li")
.map(|selectors| select_all_html_tag(selectors, parser));
let (mut page_number, mut max_page_number) = (1, 1);
if let Some(selectors) = pagination_selectors {
for html_tag in selectors {
for child in html_tag.children().all(parser) {
if let Ok(num) = child.inner_text(parser).parse() {
if html_tag.attributes().is_class_member("active") {
page_number = num;
}
max_page_number = num;
}
}
}
}
Ok((page_number, max_page_number))
}
fn attribute_from_tag<'a>(html_tag: &'a HTMLTag, attribute_key: &'a str) -> Option<Cow<'a, str>> {
html_tag
.attributes()
.get(attribute_key)
.flatten()
.map(|bytes| bytes.as_utf8_str())
}
fn select_all_html_tag<'a, 'b>(
selectors: QuerySelectorIterator<'a, 'b, HTMLTag<'a>>,
parser: &'a Parser,
) -> impl Iterator<Item = &'a HTMLTag<'a>> + 'b {
selectors.flat_map(|selector| html_tag_from_handle(selector, parser))
}
fn select_first_html_tag<'a>(
html_tag: &'a HTMLTag,
parser: &'a Parser,
selector: &'a str,
) -> Option<&'a HTMLTag<'a>> {
html_tag
.query_selector(parser, selector)
.and_then(|mut selectors| selectors.next())
.and_then(|selector| html_tag_from_handle(selector, parser))
}
fn select_query_inside_class<'a>(
dom: &'a VDom,
parser: &'a Parser,
class: &'a str,
selector: &'a str,
) -> Option<QuerySelectorIterator<'a, 'a, HTMLTag<'a>>> {
dom.get_elements_by_class_name(class)
.next()
.and_then(|selector| html_tag_from_handle(selector, parser))
.and_then(|html_tag| html_tag.query_selector(parser, selector))
}
fn html_tag_from_handle<'a>(handle: NodeHandle, parser: &'a Parser) -> Option<&'a HTMLTag<'a>> {
handle.get(parser).and_then(|node| node.as_tag())
}