booru_dl/
api.rs

1//! A core module for interacting with the Gelbooru API.
2//!
3//! Usually, you prefer to use the [`BatchGetter`] struct to get the [`data`] from the Gelbooru API.
4
5use std::path::PathBuf;
6use std::sync::LazyLock;
7
8use reqwest::{Client, Url};
9use serde::{Deserialize, Serialize};
10
11/// The URLs for the Gelbooru API.
12pub mod url {
13    use super::*;
14
15    /// The base URL of the Gelbooru.
16    pub const BASE_URL: &str = "https://gelbooru.com/index.php";
17
18    /// The Api URL of the Gelbooru, which can be used to query gelbooru's database.
19    pub static API_URL: LazyLock<Url> = LazyLock::new(|| {
20        // see: https://gelbooru.com/index.php?page=wiki&s=view&id=18780
21        Url::parse_with_params(
22            BASE_URL,
23            &[
24                ("page", "dapi"),
25                ("s", "post"),
26                ("q", "index"),
27                ("json", "1"),
28            ],
29        )
30        .unwrap()
31    });
32
33    /// The Post URL of the Gelbooru, which can be used to display the images.
34    pub static POST_URL: LazyLock<Url> = LazyLock::new(|| {
35        // see: https://gelbooru.com/index.php?page=wiki&s=view&id=18780
36        Url::parse_with_params(BASE_URL, &[("page", "post"), ("s", "list"), ("q", "index")])
37            .unwrap()
38    });
39}
40
41/// This struct is used to auto initialize the `filename` field for the `Post` struct.
42#[derive(Deserialize)]
43pub(crate) struct PostInner {
44    pub(crate) id: u64,
45    pub(crate) md5: String,
46    pub(crate) file_url: String,
47    pub(crate) tags: String,
48    pub(crate) image: PathBuf,
49}
50
51impl From<PostInner> for data::field::Post {
52    /// `filename` equals to `id` with `image`'s extension.
53    /// e.g. `id = 12345`, `image = "test.jpg"`, then `filename = "12345.jpg"`.
54    fn from(value: PostInner) -> Self {
55        use crate::tool::SetFileStem;
56
57        // make sure only the filename is retained
58        let mut filename: PathBuf = value.image.file_name().unwrap().into();
59        filename.set_file_stem(value.id.to_string());
60
61        Self {
62            id: value.id,
63            md5: value.md5,
64            file_url: value.file_url,
65            tags: value.tags,
66            image: value.image,
67            filename,
68        }
69    }
70}
71
72/// The data structure for the JSON response from the Gelbooru API.
73pub mod data {
74    use super::*;
75
76    /// The fields of the [`Json`] response.
77    pub mod field {
78        use super::*;
79
80        /// The attributes field of the JSON response.
81        #[non_exhaustive]
82        #[derive(Debug, Deserialize, Serialize)]
83        pub struct Attributes {
84            /// The number of images in this response. Range: `0..=100`.
85            pub limit: u64,
86            /// The current index of the first image in this response.
87            pub offset: u64,
88            /// The total number of images in the gelbooru.
89            pub count: u64,
90        }
91
92        /// The post field of the JSON response.
93        #[non_exhaustive]
94        #[derive(Debug, Deserialize, Serialize)]
95        #[serde(from = "PostInner")]
96        pub struct Post {
97            /// The ID of the image.
98            pub id: u64,
99            /// The MD5 hash of the image.
100            pub md5: String,
101            /// The URL of the image, which can be used to download the image.
102            pub file_url: String,
103            /// The tags of the image. Note: these tags are marked by gelbooru.
104            pub tags: String,
105            /// The original file name of the image.
106            pub image: PathBuf,
107            /// The filename of the image, which is the same as `id` with the extension of `image`.
108            /// We will use this field to save the image.
109            pub(crate) filename: PathBuf,
110        }
111    }
112
113    /// The JSON structure response from the Gelbooru API.
114    #[non_exhaustive]
115    #[derive(Debug, Deserialize, Serialize)]
116    pub struct Json {
117        #[serde(rename = "@attributes")]
118        /// The attributes of the JSON response.
119        pub attributes: field::Attributes,
120        /// The posts of the JSON response.
121        /// if `attributes.count` is `0`, or `attributes.pid` is out of range,
122        /// this field will be `None`.
123        pub post: Option<Vec<field::Post>>,
124    }
125}
126
127/// A Consuming-Builders style function to get the data from the Gelbooru API.
128///
129/// # Example
130///
131/// ```rust
132/// use reqwest::Client;
133/// use booru_dl::api::Getter;
134///
135/// #[tokio::main]
136/// async fn main() -> reqwest::Result<()> {
137///     let client = Client::new();
138///     let tags = "cat";
139///     let limit = 10;
140///     let pid = 0;
141///
142///     let data = Getter::build(&client, &tags, limit, pid)
143///         .expect("illegal arguments")
144///         .run()
145///         .await?;
146///
147///     Ok(())
148/// }
149/// ```
150pub struct Getter<'a> {
151    client: &'a Client,
152    tags: &'a str,
153    limit: u64,
154    pid: u64,
155}
156
157impl Getter<'_> {
158    /// See <https://gelbooru.com/index.php?page=wiki&s=view&id=18780> for arguments.
159    ///
160    /// # Errors
161    ///
162    /// If `tags` is empty, or `limit` is not in the range `1..=100`, this function will return an error.
163    pub fn build<'a>(
164        client: &'a Client,
165        tags: &'a str,
166        limit: u64,
167        pid: u64,
168    ) -> anyhow::Result<Getter<'a>> {
169        if tags.is_empty() {
170            return Err(anyhow::anyhow!("Tags cannot be empty"));
171        }
172        // This is gelbooru's limit.
173        // see: https://gelbooru.com/index.php?page=wiki&s=view&id=18780
174        if !matches!(limit, 1..=100) {
175            return Err(anyhow::anyhow!("Limit can only be between 1 and 100"));
176        }
177        Ok(Getter {
178            client,
179            tags,
180            limit,
181            pid,
182        })
183    }
184
185    /// Send the request to the Gelbooru API and get the JSON response.
186    ///
187    /// # Errors
188    ///
189    /// If the request fails, this function will return an error.
190    ///
191    /// <div class="warning">
192    ///
193    /// If `limit * pid > 20_000`, the API will return an error.
194    ///
195    /// See: <https://gelbooru.com/index.php?page=forum&s=view&id=1549>
196    ///
197    /// </div>
198    pub async fn run(self) -> reqwest::Result<data::Json> {
199        let mut target_url = url::API_URL.clone();
200        target_url.query_pairs_mut().extend_pairs([
201            ("tags", self.tags),
202            ("limit", &self.limit.to_string()),
203            ("pid", &self.pid.to_string()),
204        ]);
205        self.client.get(target_url).send().await?.json().await
206    }
207}
208
209/// This helper wraps the [`Getter`] struct and automatically polls the API until the number of images is reached.
210///
211/// # Example
212///
213/// See [`Getter#example`] for example usage.
214pub struct BatchGetter<'a> {
215    client: &'a Client,
216    tags: &'a str,
217    num_imgs: u64,
218}
219
220impl BatchGetter<'_> {
221    /// See [`Getter::build`] for arguments.
222    ///
223    /// # Errors
224    ///
225    /// If `tags` is empty, or `num_imgs` is 0, this function will return an error.
226    pub fn build<'a>(
227        client: &'a Client,
228        tags: &'a str,
229        num_imgs: u64,
230    ) -> anyhow::Result<BatchGetter<'a>> {
231        if tags.is_empty() {
232            return Err(anyhow::anyhow!("Tags cannot be empty"));
233        }
234        if num_imgs == 0 {
235            return Err(anyhow::anyhow!("Number of images cannot be 0"));
236        }
237        Ok(BatchGetter {
238            client,
239            tags,
240            num_imgs,
241        })
242    }
243
244    /// Wraps the [`Getter`] struct and automatically polls the API until the number of images is reached.
245    ///
246    /// If none of the images are found, this function will return an zero capacity vector.
247    ///
248    /// # Errors
249    ///
250    /// If the request fails, this function will return an error.
251    ///
252    /// <div class="warning">
253    ///
254    /// If `num_imgs > 20_000`, the API will return an error.
255    ///
256    /// See: <https://gelbooru.com/index.php?page=forum&s=view&id=1549>
257    ///
258    /// </div>
259    pub async fn run(self) -> reqwest::Result<Vec<data::field::Post>> {
260        const LIMIT: u64 = 100;
261
262        let Self {
263            client,
264            tags,
265            num_imgs,
266        } = self;
267
268        let mut current_pid = 0;
269        let data = Getter::build(client, tags, LIMIT, current_pid)
270            .unwrap()
271            .run()
272            .await?;
273
274        let mut post_vec = match data.post {
275            Some(post) => post,
276            None => return Ok(Vec::with_capacity(0)),
277        };
278        let total_num: usize = std::cmp::min(num_imgs, data.attributes.count)
279            .try_into()
280            .expect("total number is too large to convert to `usize`");
281        // if `total_num` is 0, then `data.attributes.count` is 0,
282        // so `data.post` should be `None` and return early.
283        debug_assert_ne!(total_num, 0);
284
285        while post_vec.len() < total_num {
286            current_pid += 1;
287            let current_post_vec = Getter::build(client, tags, LIMIT, current_pid)
288                .unwrap()
289                .run()
290                .await?
291                .post
292                .expect(
293                    "if `post_vec` is shorter than `total_num`, \
294                    then `post` should not be `None`",
295                );
296            post_vec.extend(current_post_vec);
297        }
298        post_vec.truncate(total_num);
299
300        Ok(post_vec)
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn test_illegal_args() {
310        let client = Client::new();
311
312        let resp = Getter::build(&client, "", 100, 0);
313        assert!(resp.is_err());
314
315        let resp = Getter::build(&client, "cat", 0, 0);
316        assert!(resp.is_err());
317    }
318
319    #[tokio::test]
320    async fn test_get_api_data() -> reqwest::Result<()> {
321        let client = Client::new();
322        let tag = "cat";
323        let limit = 10;
324
325        let resp = Getter::build(&client, tag, limit, 0).unwrap().run().await?;
326        assert_eq!(resp.attributes.limit, limit);
327        assert!(resp
328            .post
329            .expect("if `attributes.limit` is correct, then `post` shouldn't be `None`")[0]
330            .tags
331            .contains(tag));
332        Ok(())
333    }
334
335    #[tokio::test]
336    async fn test_batch_get_api_data() -> reqwest::Result<()> {
337        let client = Client::new();
338        let tag = "cat";
339        let num_imgs = 101;
340
341        let resp = BatchGetter::build(&client, tag, num_imgs)
342            .unwrap()
343            .run()
344            .await?;
345        assert_eq!(resp.len(), usize::try_from(num_imgs).unwrap());
346
347        let tag = "balabala just no exist";
348        let resp = BatchGetter::build(&client, tag, num_imgs)
349            .unwrap()
350            .run()
351            .await?;
352        assert!(resp.is_empty());
353        Ok(())
354    }
355}