1use scraper::{ElementRef, Html, Selector};
2use std::sync::OnceLock;
3
4use crate::error::{Error, Result};
5use crate::model::{Size, Torrent};
6
7static ITEM_SELECTOR: OnceLock<Selector> = OnceLock::new();
8static TITLE_SELECTOR: OnceLock<Selector> = OnceLock::new();
9static TORRENT_LINK_SELECTOR: OnceLock<Selector> = OnceLock::new();
10static MAGNET_SELECTOR: OnceLock<Selector> = OnceLock::new();
11static SEEDERS_SELECTOR: OnceLock<Selector> = OnceLock::new();
12static LEECHERS_SELECTOR: OnceLock<Selector> = OnceLock::new();
13static DATE_SELECTOR: OnceLock<Selector> = OnceLock::new();
14static DOWNLOADS_SELECTOR: OnceLock<Selector> = OnceLock::new();
15static SIZE_SELECTOR: OnceLock<Selector> = OnceLock::new();
16
17pub fn extract(html: &str, base_url: &str) -> Result<Vec<Torrent>> {
18 let document = Html::parse_document(html);
19 let selector = ITEM_SELECTOR
20 .get_or_init(|| Selector::parse("table>tbody>tr").unwrap());
21 let items = document.select(selector);
22 let mut res_vec: Vec<Torrent> = Vec::with_capacity(75);
23
24 for item in items {
25 let title = extract_title(item)?;
26 let torrent_link = extract_torrent_link(item, base_url)?;
27 let magnet = extract_magnet_url(item)?;
28 let seeders = extract_seeders(item)?;
29 let leechers = extract_leechers(item)?;
30 let downloads = extract_downloads(item)?;
31 let size = extract_size(item)?;
32 let date = extract_date(item)?;
33 let torrent = Torrent {
34 title,
35 link: torrent_link,
36 magnet_url: magnet,
37 date,
38 seeders,
39 leechers,
40 downloads,
41 size,
42 };
43 res_vec.push(torrent);
44 }
45 Ok(res_vec)
46}
47
48fn extract_title(item: ElementRef<'_>) -> Result<String> {
49 let selector = TITLE_SELECTOR.get_or_init(|| {
50 Selector::parse("td:nth-of-type(2)>a:last-child").unwrap()
51 });
52 let title = item
53 .select(selector)
54 .next()
55 .ok_or(Error::SelectorError("Title not found".into()))?;
56 Ok(title.text().collect())
57}
58
59fn extract_torrent_link(
60 item: ElementRef<'_>,
61 base_url: &str,
62) -> Result<String> {
63 let selector = TORRENT_LINK_SELECTOR.get_or_init(|| {
64 Selector::parse("td:nth-of-type(3)>a:first-child").unwrap()
65 });
66 let link = item
67 .select(selector)
68 .next()
69 .ok_or(Error::SelectorError("Link not found".into()))?;
70 link.value()
71 .attr("href")
72 .ok_or(Error::SelectorError("Link not found".into()))
73 .map(|s| format!("{}{}", base_url, s))
74}
75
76fn extract_magnet_url(item: ElementRef<'_>) -> Result<String> {
77 let selector = MAGNET_SELECTOR.get_or_init(|| {
78 Selector::parse("td:nth-of-type(3)>a:last-child").unwrap()
79 });
80 let link = item
81 .select(selector)
82 .next()
83 .ok_or(Error::SelectorError("magnet not found".into()))?;
84 link.value()
85 .attr("href")
86 .ok_or(Error::SelectorError("magnet not found".into()))
87 .map(|s| s.to_string())
88}
89
90fn extract_seeders(item: ElementRef<'_>) -> Result<u32> {
91 let selector = SEEDERS_SELECTOR
92 .get_or_init(|| Selector::parse("td:nth-of-type(6)").unwrap());
93 let seeders = item
94 .select(selector)
95 .next()
96 .ok_or(Error::SelectorError("Seeders not found".into()))?;
97 let seeders_str: String = seeders.text().collect();
98 seeders_str
99 .parse::<u32>()
100 .map_err(|_| Error::SelectorError("Seeders not found".into()))
101}
102
103fn extract_leechers(item: ElementRef<'_>) -> Result<u32> {
104 let selector = LEECHERS_SELECTOR
105 .get_or_init(|| Selector::parse("td:nth-of-type(7)").unwrap());
106 let leechers = item
107 .select(selector)
108 .next()
109 .ok_or(Error::SelectorError("Leechers not found".into()))?;
110 let leechers_str: String = leechers.text().collect();
111 leechers_str
112 .parse::<u32>()
113 .map_err(|_| Error::SelectorError("Leechers not found".into()))
114}
115
116fn extract_downloads(item: ElementRef<'_>) -> Result<u32> {
117 let selector = DOWNLOADS_SELECTOR
118 .get_or_init(|| Selector::parse("td:nth-of-type(8)").unwrap());
119 let downloads = item
120 .select(selector)
121 .next()
122 .ok_or(Error::SelectorError("Downloads not found".into()))?;
123 let downloads_str: String = downloads.text().collect();
124 downloads_str
125 .parse::<u32>()
126 .map_err(|_| Error::SelectorError("Downloads not found".into()))
127}
128
129fn extract_size(item: ElementRef<'_>) -> Result<Size> {
130 let selector = SIZE_SELECTOR
131 .get_or_init(|| Selector::parse("td:nth-of-type(4)").unwrap());
132 let size = item
133 .select(selector)
134 .next()
135 .ok_or(Error::SelectorError("Size not found".into()))?;
136 size.text().collect::<String>().parse()
137}
138
139fn extract_date(item: ElementRef<'_>) -> Result<chrono::DateTime<chrono::Utc>> {
140 let selector = DATE_SELECTOR
141 .get_or_init(|| Selector::parse("td:nth-of-type(5)").unwrap());
142 let date = item
143 .select(selector)
144 .next()
145 .ok_or(Error::SelectorError("Date not found".into()))?;
146 date.attr("data-timestamp")
147 .ok_or(Error::SelectorError("Date not found".into()))
148 .map(|s| {
149 let time_stamp = s.parse::<i64>().unwrap();
150 let ts_in_millis = time_stamp * 1000;
151 chrono::DateTime::from_timestamp_millis(ts_in_millis)
152 .unwrap_or_default()
153 })
154}