dynasty-api 1.1.0

Dynasty Reader's wrappers
Documentation
use std::{borrow::Cow, ops::Deref};

use anyhow::{Context, Result};
use tl::{queryselector::QuerySelectorIterator, HTMLTag, NodeHandle, Parser, VDom};

use super::{SearchCategory, SearchItem};

pub fn parse_items(dom: &VDom, parser: &Parser) -> Result<Vec<SearchItem>> {
    // selects all dd elements inside the chapter list element
    let item_selectors = select_query_inside_class(dom, parser, "chapter-list", "dd")
        .context("unable to parse item list element")
        .map(|selectors| select_all_html_tag(selectors, parser))?;

    let mut results = vec![];
    for html_tag in item_selectors {
        results.push(parse_item(html_tag, parser)?);
    }

    Ok(results)
}

fn parse_item(html_tag: &HTMLTag, parser: &Parser) -> Result<SearchItem> {
    let mut tags = vec![];
    if let Some(selectors) = html_tag
        .query_selector(parser, "a")
        .map(|selectors| select_all_html_tag(selectors, parser))
    {
        for child_html_tag in selectors {
            if child_html_tag.attributes().is_class_member("name") {
                // skip the title
                continue;
            }

            let href = attribute_from_tag(child_html_tag, "href")
                .context("unable to find html_tag permalink")?;

            let (permalink, search_kind) = parse_full_permalink(&href)?;
            if let SearchCategory::Directory(kind) = search_kind {
                tags.push(crate::TagItem {
                    name: child_html_tag.inner_text(parser).to_string(),
                    kind,
                    permalink,
                })
            }
        }
    }

    let title_html_tag = select_first_html_tag(html_tag, parser, ".name")
        .context("unable to find title's anchor tag")?;

    let title =
        html_escape::decode_html_entities(title_html_tag.inner_text(parser).deref()).to_string();
    let href = attribute_from_tag(title_html_tag, "href")
        .context("unable to find permalink from the title's anchor tag")?;
    let (permalink, kind) = parse_full_permalink(&href)?;

    Ok(SearchItem {
        title,
        permalink,
        kind,
        tags,
    })
}

/// Parses permalink like `/chapters/title`, `/tags/tt`, etc
fn parse_full_permalink(permalink: &str) -> Result<(String, SearchCategory)> {
    let (directory, actual_permalink) = permalink[1..]
        .split_once('/')
        .with_context(|| format!("unable to find permalink delimiter `{}`", permalink))?;

    let kind = if let Ok(directory_kind) = directory.parse() {
        SearchCategory::Directory(directory_kind)
    } else if directory == "chapters" {
        SearchCategory::Chapter
    } else {
        return Err(anyhow::anyhow!("unable to parse permalink `{}`", permalink));
    };

    Ok((actual_permalink.to_string(), kind))
}

/// Parses page number from pagination element, defaults to `(1, 1)` if there is none
pub fn parse_page_numbers(dom: &VDom, parser: &Parser) -> Result<(u64, u64)> {
    // selects all li elements inside the pagination element
    let pagination_selectors = select_query_inside_class(dom, parser, "pagination", "li")
        .map(|selectors| select_all_html_tag(selectors, parser));

    // we (try to) parse the page number by looping thru the selectors
    let (mut page_number, mut max_page_number) = (1, 1);
    if let Some(selectors) = pagination_selectors {
        for html_tag in selectors {
            for child in html_tag.children().all(parser) {
                if let Ok(num) = child.inner_text(parser).parse() {
                    // current page number has `active` class
                    if html_tag.attributes().is_class_member("active") {
                        page_number = num;
                    }

                    // we replace the previous max_page_number
                    // because the last parse-able innerText
                    // is guaranteed to be the last page number
                    max_page_number = num;
                }
            }
        }
    }

    Ok((page_number, max_page_number))
}

/// Gets attribute value as [str]
fn attribute_from_tag<'a>(html_tag: &'a HTMLTag, attribute_key: &'a str) -> Option<Cow<'a, str>> {
    html_tag
        .attributes()
        .get(attribute_key)
        .flatten()
        .map(|bytes| bytes.as_utf8_str())
}

/// Converts selectors's [NodeHandle] into [HTMLTag]
fn select_all_html_tag<'a, 'b>(
    selectors: QuerySelectorIterator<'a, 'b, HTMLTag<'a>>,
    parser: &'a Parser,
) -> impl Iterator<Item = &'a HTMLTag<'a>> + 'b {
    selectors.flat_map(|selector| html_tag_from_handle(selector, parser))
}

/// Returns the first matches of the selector as [HTMLTag]
fn select_first_html_tag<'a>(
    html_tag: &'a HTMLTag,
    parser: &'a Parser,
    selector: &'a str,
) -> Option<&'a HTMLTag<'a>> {
    html_tag
        .query_selector(parser, selector)
        .and_then(|mut selectors| selectors.next())
        .and_then(|selector| html_tag_from_handle(selector, parser))
}

/// Query selector inside the first element with class
fn select_query_inside_class<'a>(
    dom: &'a VDom,
    parser: &'a Parser,
    class: &'a str,
    selector: &'a str,
) -> Option<QuerySelectorIterator<'a, 'a, HTMLTag<'a>>> {
    dom.get_elements_by_class_name(class)
        .next()
        .and_then(|selector| html_tag_from_handle(selector, parser))
        .and_then(|html_tag| html_tag.query_selector(parser, selector))
}

/// Converts a [NodeHandle] into [HTMLTag]
fn html_tag_from_handle<'a>(handle: NodeHandle, parser: &'a Parser) -> Option<&'a HTMLTag<'a>> {
    handle.get(parser).and_then(|node| node.as_tag())
}