1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
use crate::{Sauce, SauceItem, SauceResult, SauceError};
use async_trait::async_trait;
use reqwest::{header, Client};
use select::document::Document;
use select::predicate::*;

const BASE_ADDRESS: &str = "https://iqdb.org/";

/// Gets sauces from iqdb.org
#[derive(Debug)]
pub struct IQDB;

#[async_trait]
impl Sauce for IQDB {
    async fn build_url(&self, url: &str) -> Result<String, SauceError> {
        Ok(format!("{}?url={}", BASE_ADDRESS, url))
    }

    async fn check_sauce(&self, original_url: String) -> Result<SauceResult, SauceError> {
        let url = &original_url;
        let cli = Client::new();

        let head = cli
            .head(&original_url)
            .send()
            .await?;

        let content_type = head.headers().get(header::CONTENT_TYPE);
        if let Some(content_type) = content_type {
            let content_type = content_type
                .to_str()?;
            if !content_type.contains("image") {
                return Err(SauceError::LinkIsNotImage);
            }
        }

        let resp = cli
            .get(&self.build_url(url).await?)
            .send()
            .await?;

        let resp = resp
            .text()
            .await?;

        let html = Document::from(resp.as_str());

        let mut status = html.find(Attr("id", "urlstat"));

        if let Some(status) = status.next() {
            if !status.text().trim().starts_with("OK, ") {
                return Err(SauceError::GenericString(format!("Unable to retrieve sauce: {}", status.text())));
            }
        }

        let mut res = SauceResult {
            original_url: url.to_string(),
            ..Default::default()
        };

        let mut pages = html.find(Attr("id", "pages"));

        let mut first = false;

        let pages = pages.next();

        if let Some(pages) = pages {
            for node in pages.children() {
                if !first {
                    first = true;
                    continue;
                }
                let mut item = SauceItem::default();

                for (idx, node) in node.find(Name("tr")).enumerate() {
                    match idx {
                        0 | 2..=3 => continue,
                        1 => {
                            let td =
                                match node.first_child() {
                                    Some(node) => Ok(node),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            let link =
                                match td.first_child() {
                                    Some(node) => Ok(node),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            let href =
                                match link.attr("href") {
                                    Some(href) => Ok(href.to_string()),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            item.link = if href.starts_with("//") {
                                "https:".to_string() + &href
                            } else {
                                href
                            };
                        }
                        4 => {
                            let td =
                                match node.first_child() {
                                    Some(node) => Ok(node),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            let text = td.text();
                            let similarity = text.split('%').collect::<Vec<&str>>()[0];
                            item.similarity = match similarity.parse::<f64>() {
                                Ok(similarity) => Ok(similarity),
                                Err(e) => Err(SauceError::UnableToConvertToFloat(e)),
                            }?;
                        }
                        _ => break,
                    }
                }

                if item.link.is_empty() {
                    continue;
                }

                res.items.push(item);
            }
        }

        let pages = html.find(Attr("id", "more1")).next();

        if let Some(pages) = pages {
            let real_pages = match pages.find(Class("pages")).next() {
                Some(real_pages) => Ok(real_pages),
                None => Err(SauceError::UnableToRetrieve("failed to parse page")),
            }?;

            for node in real_pages.children() {
                let mut item = SauceItem::default();

                for (idx, node) in node.find(Name("tr")).enumerate() {
                    match idx {
                        0 => {
                            let td =
                                match node.first_child() {
                                    Some(node) => Ok(node),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            let link =
                                match td.first_child() {
                                    Some(node) => Ok(node),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            let href =
                                match link.attr("href") {
                                    Some(href) => Ok(href.to_string()),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            item.link = if href.starts_with("//") {
                                "https:".to_string() + &href
                            } else {
                                href
                            };
                        }
                        1..=2 => continue,
                        3 => {
                            let td =
                                match node.first_child() {
                                    Some(node) => Ok(node),
                                    None => Err(SauceError::UnableToRetrieve("failed to parse page")),
                                }?;
                            let text = td.text();
                            let similarity = text.split('%').collect::<Vec<&str>>()[0];
                            item.similarity = match similarity.parse::<f64>() {
                                Ok(similarity) => Ok(similarity),
                                Err(e) => Err(SauceError::UnableToConvertToFloat(e)),
                            }?;
                        }
                        _ => break,
                    }
                }

                if item.link.is_empty() {
                    continue;
                }

                res.items.push(item);
            }
        }

        Ok(res)
    }
}