use std::fmt::{Debug, Formatter};
use std::sync::LazyLock;
use html_escape::decode_html_entities;
use log::debug;
use regex::Regex;
use reqwest::IntoUrl;
use scraper::{Html, Selector};
use crate::backends::{BackendError, ChapterListElem, ChapterOrderingFn};
use crate::utils::get;
use crate::{Backend, Chapter};
pub(crate) static TITLE_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse("h1.tit").unwrap());
pub(crate) static AUTHORS_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse("a.a1").unwrap());
pub(crate) static CHAPTER_LIST_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse("div.m-newest2 ul#idData li a.con").unwrap());
pub(crate) static CHAPTER_TITLE_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse("div.top span.chapter").unwrap());
pub(crate) static CHAPTER_CONTENT_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse("div.txt div#article").unwrap());
pub(crate) static FICTION_COVER_IMAGE_URL_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse("meta[property='og:image']").unwrap());
pub struct FreeWebNovel {
url: String,
page: Html,
}
#[allow(unused_variables, dead_code)]
impl Debug for FreeWebNovel {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
#[derive(Debug)]
struct FreeWebNovel<'a> {
url: &'a String,
}
let Self { url, page: _ } = self;
Debug::fmt(&FreeWebNovel { url }, f)
}
}
impl Default for FreeWebNovel {
fn default() -> Self {
Self {
url: "".to_string(),
page: Html::new_document(),
}
}
}
impl Backend for FreeWebNovel {
fn get_backend_regexps() -> Vec<Regex> {
vec![Regex::new(r"https?://freewebnovel\.com/[\w-]+\.html").unwrap()]
}
fn get_backend_name() -> &'static str {
"freewebnovel"
}
fn get_ordering_function() -> ChapterOrderingFn {
fn parse_chapter_id(chapter_title: &str) -> Option<u32> {
let re = Regex::new(r"Chapter (\d+)").unwrap();
re.captures(chapter_title)
.and_then(|caps| caps.get(1))
.and_then(|cap| cap.as_str().parse::<u32>().ok())
}
Box::new(|c1: &Chapter, c2: &Chapter| {
let chapter_number_1 = c1
.title()
.clone()
.and_then(|title| parse_chapter_id(title.as_str()));
let chapter_number_2 = c2
.title()
.clone()
.and_then(|title| parse_chapter_id(title.as_str()));
chapter_number_1.cmp(&chapter_number_2)
})
}
fn new(url: &str) -> Result<Self, BackendError> {
let req = get(url)?;
if !req.status().is_success() {
return Err(BackendError::RequestFailed {
message: format!("Could not fetch url {url}"),
status: req.status(),
content: req.text()?,
});
}
Ok(Self {
url: url.to_string(),
page: Html::parse_document(&req.text()?),
})
}
fn title(&self) -> Result<String, BackendError> {
title(&self.page)
}
fn immutable_identifier(&self) -> Result<String, BackendError> {
Ok(self
.url
.split('/')
.last()
.unwrap()
.to_string()
.strip_suffix(".html")
.unwrap()
.to_string())
}
fn url(&self) -> String {
self.url.clone()
}
fn cover_url(&self) -> Result<String, BackendError> {
get_cover_url(&self.page)
}
fn get_authors(&self) -> Result<Vec<String>, BackendError> {
authors(&self.page)
}
fn get_chapter_list(&self) -> Result<Vec<ChapterListElem>, BackendError> {
get_chapter_list(&self.page)
}
fn get_chapter(&self, chapter_number: usize) -> Result<Chapter, BackendError> {
if chapter_number == 0 {
return Err(BackendError::UnknownChapter(chapter_number));
}
let chapter_url = self
.page
.select(&CHAPTER_LIST_SELECTOR)
.map(|select| select.attr("href").unwrap())
.nth(chapter_number - 1)
.ok_or(BackendError::UnknownChapter(chapter_number))?;
let chapter_url = format!("https://freewebnovel.com{}", chapter_url);
let mut chapter = get_chapter(chapter_url)?;
chapter.index = chapter_number;
chapter.fiction_url = self.url.clone();
Ok(chapter)
}
fn get_chapter_count(&self) -> Result<usize, BackendError> {
chapter_count(&self.page)
}
}
pub(crate) fn get_cover_url(page: &Html) -> Result<String, BackendError> {
Ok(page
.select(&FICTION_COVER_IMAGE_URL_SELECTOR)
.next()
.ok_or(BackendError::ParseError(
"Could not find cover url".to_string(),
))?
.attr("content")
.ok_or(BackendError::ParseError(
"Could not find cover url: missing \"content\" attribute.".to_string(),
))?
.to_string())
}
pub(crate) fn get_chapter(url: impl IntoUrl) -> Result<Chapter, BackendError> {
let url_str = url.into_url()?.to_string();
let resp = get(&url_str)?;
if !resp.status().is_success() {
return Err(BackendError::RequestFailed {
message: format!("Could not get chapter at URL {url_str}"),
status: resp.status(),
content: resp.text()?,
});
}
let page = Html::parse_document(&resp.text()?);
let chapter_title = decode_html_entities(
&page
.select(&CHAPTER_TITLE_SELECTOR)
.next()
.unwrap()
.inner_html(),
)
.to_string();
let chapter_content = page
.select(&CHAPTER_CONTENT_SELECTOR)
.next()
.unwrap()
.inner_html();
let mut chapter = Chapter::default();
chapter.set_title(Some(chapter_title));
chapter.set_chapter_url(url_str);
chapter.set_content(chapter_content);
Ok(chapter)
}
pub(crate) fn title(page: &Html) -> Result<String, BackendError> {
let title = page
.select(&TITLE_SELECTOR)
.map(|sel| sel.inner_html())
.next();
debug!("title: {:?}", title);
if title.is_none() {
return Err(BackendError::ParseError(
"Could not get a title".to_string(),
));
}
Ok(title.unwrap())
}
pub(crate) fn authors(page: &Html) -> Result<Vec<String>, BackendError> {
let authors = page
.select(&AUTHORS_SELECTOR)
.filter(|selection| {
if let Some(href) = selection.attr("href") {
return href.starts_with("/author/") || href.starts_with("/authors/");
}
false
})
.map(|a| a.inner_html())
.collect();
Ok(authors)
}
pub(crate) fn get_chapter_list(page: &Html) -> Result<Vec<ChapterListElem>, BackendError> {
Ok(page
.select(&CHAPTER_LIST_SELECTOR)
.enumerate()
.map(|(index, elem)| {
(
index + 1,
decode_html_entities(elem.attr("title").unwrap()).to_string(),
)
})
.collect())
}
pub(crate) fn chapter_count(page: &Html) -> Result<usize, BackendError> {
let chapter_links: Vec<String> = page
.select(&CHAPTER_LIST_SELECTOR)
.map(|select| select.attr("href").unwrap().to_string())
.collect();
Ok(chapter_links.len())
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use test_log::test;
use crate::backends::FreeWebNovel;
use crate::{Backend, Chapter};
const TEST_URL: &str = "https://freewebnovel.com/the-guide-to-conquering-earthlings.html";
#[test]
fn test_chapter_to_string_and_back() {
let b = FreeWebNovel::new(TEST_URL).unwrap();
let chapter = b.get_chapter(1).unwrap();
let s = chapter.to_string();
let chapter2 = Chapter::from_str(&s).unwrap();
assert_eq!(chapter, chapter2);
}
#[test]
fn test_chapter_list_equality() {
let b = FreeWebNovel::new(TEST_URL).unwrap();
let chapters: Vec<Chapter> = (1..3)
.map(|index| b.get_chapter(index).unwrap())
.collect::<Vec<_>>();
let expected = b.get_chapter_list().unwrap();
for chapter in chapters {
assert_eq!(
chapter.title(),
&Some(expected[chapter.index - 1].1.clone())
)
}
}
}