use std::fmt::Display;
use std::io::{self};
use url::Url;
use crate::scraper::FALLBACK_TLD;
use crate::scraper::types::ScraperOptions;
pub(crate) fn id_from_url(url: &str) -> io::Result<String> {
let url_obj = Url::try_from(url).to_result()?;
const INVALID_URL: &str = "Invalid URL";
Ok(match url_obj.query_pairs().find(|x| x.0 == "id") {
Some(x) => x.1.to_string(),
None => url_obj
.path_segments()
.to_result(INVALID_URL)?
.next_back()
.to_result(INVALID_URL)?
.to_string(),
})
}
pub(crate) fn url_from_id(id: &str, options: Option<&ScraperOptions>) -> String {
let tld = options.map_or(FALLBACK_TLD, |x| x.tld.as_str());
std::format!("https://books.google{tld}/books?id={id}&hl=en")
}
pub(crate) fn get_json_url(id: &str, first_page: &str, page_id: &str, options: Option<&ScraperOptions>) -> String {
std::format!(
"{}&lpg={first_page}&pg={page_id}&jscmd=click3",
url_from_id(id, options)
)
}
pub(crate) fn sanitize_url(url: &str, options: Option<&ScraperOptions>) -> io::Result<String> {
let base_url = url_from_id(&id_from_url(url)?, options);
const PERIOD_TAG: &str = "atm_aiy";
let url_obj = Url::try_from(url).to_result()?;
match url_obj.query_pairs().find(|x| x.0 == PERIOD_TAG) {
Some(x) => Ok(std::format!("{base_url}&{PERIOD_TAG}={}", x.1)),
None => Ok(base_url),
}
}
pub(crate) trait ToResult<T> {
fn to_result(self) -> std::io::Result<T>;
}
impl<T, E: Display> ToResult<T> for std::result::Result<T, E> {
fn to_result(self) -> std::io::Result<T> {
match self {
Ok(x) => Ok(x),
Err(x) => Err(std::io::Error::other(x.to_string())),
}
}
}
pub(crate) trait ToResultErrorMessage<T> {
fn to_result(self, msg: &str) -> std::io::Result<T>;
}
impl<T> ToResultErrorMessage<T> for Option<T> {
fn to_result(self, msg: &str) -> std::io::Result<T> {
match self {
Some(x) => Ok(x),
None => Err(std::io::Error::other(msg.to_string())),
}
}
}
pub(crate) fn generate_image_filename(page_number: &usize, page_id: &str, ext: &str) -> String {
std::format!("{:0>5}-{page_id}.{ext}", page_number)
}
pub(crate) fn get_image_ext(res: &reqwest::blocking::Response) -> io::Result<String> {
let mut ext = "jpg";
for (name, value) in res.headers() {
if name.as_str() == "content-type" {
ext = value.to_str().to_result()?;
let mut start = 0;
if let Some(x) = ext.find("/") {
start = x + 1
}
ext = &ext[start..];
if ext == "jpeg" {
ext = "jpg"
}
break;
}
}
Ok(ext.to_string())
}
pub(crate) fn try_download(url: &str, mut attempts: u32) -> io::Result<reqwest::blocking::Response> {
let indefinite = attempts == 0;
let mut res: io::Result<reqwest::blocking::Response> = Err(io::Error::other(""));
while indefinite || attempts > 0 {
res = reqwest::blocking::get(url).to_result();
if let Ok(res) = res {
return Ok(res);
}
if !indefinite {
attempts -= 1;
eprintln!("Download failed for {url}. {attempts} attempt(s) remaining...");
}
else{
eprintln!("Download failed for {url}. Retrying...");
}
}
res
}
#[cfg(test)]
mod tests {
use super::*;
const ID: &str = "FAKE_ID";
const ARGS: &str = "a=aa&b=bb&c=1";
#[test]
fn old_url_parsing() {
let url = std::format!("https://books.google.com/books?id={ID}&{ARGS}");
assert_eq!(id_from_url(&url).unwrap().as_str(), ID);
}
#[test]
fn new_url_parsing() {
let url = std::format!("https://www.google.com/books/edition/_/{ID}?{ARGS}");
assert_eq!(id_from_url(&url).unwrap().as_str(), ID);
}
#[test]
fn url_fixing() {
let url = url_from_id(ID, None);
let expected = std::format!("https://books.google.us/books?id={ID}&hl=en");
assert_eq!(url, expected);
let tld = ".co.jp";
let options = ScraperOptions {
tld: tld.to_string(),
..Default::default()
};
let url = url_from_id(ID, Some(&options));
let expected = std::format!("https://books.google{tld}/books?id={ID}&hl=en");
assert_eq!(url, expected);
}
}