1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
use futures::TryStreamExt;
use regex::Regex;
use serde::{Deserialize, Serialize};
use url::Url;

use std::ops::RangeBounds;
use std::path::Path;

#[derive(Deserialize, Serialize)]
pub struct Resource {
    pub mime: String,
    pub url: url::Url,
}

#[derive(Deserialize, Serialize)]
pub struct Page {
    pub resources: Vec<Resource>,
    pub id: u32,
}

#[derive(Deserialize)]
pub struct Item {
    pub scans: Vec<Page>,
}

#[derive(Debug, thiserror::Error)]
pub enum Error {
    #[error("{0}")]
    Io(#[from] std::io::Error),
    #[error("{0}")]
    Http(#[from] reqwest::Error),
    #[error("{0}")]
    Serde(#[from] serde_json::Error),
}

#[must_use]
pub fn resource_id_from_url(url: &Url) -> String {
    Regex::new(r"item/([a-zA-Z0-9-]+,)?([a-zA-Z0-9]+)")
        .expect("correct regexp")
        .captures(url.path())
        .unwrap()
        .get(2)
        .unwrap()
        .as_str()
        .to_string()
}

pub async fn list_pages<R>(url: &Url, range: R) -> Result<Vec<Page>, Error>
where
    R: RangeBounds<usize>,
{
    let mut url = url.clone();
    let resource_id = resource_id_from_url(&url);
    let path = format!("/api/entities/{}/", resource_id);
    url.set_path(&path);

    let mut item: Item = reqwest::get(url.as_str()).await?.json().await?;
    for page in &mut item.scans {
        page.resources.retain(|r| r.mime == "image/jpeg");
    }
    Ok(item.scans.drain(range).collect())
}

pub async fn download_page_image(url: &Url, output_path: &Path) -> Result<(), Error> {
    use async_compat::CompatExt;
    use tokio::io::AsyncWriteExt;

    let mut file = tokio::fs::OpenOptions::new()
        .append(true)
        .create_new(true)
        .open(output_path)
        .await?;

    let mut response = reqwest::get(url.as_str())
        .await?
        .bytes_stream()
        .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
        .into_async_read();

    futures::io::copy(&mut response, &mut file.compat_mut()).await?;

    file.flush().await?;

    Ok(())
}