olx/
search.rs

1use std::convert::TryInto;
2use scraper::{Html, Selector};
3
4#[derive(Debug)]
5#[non_exhaustive]
6pub struct Item {
7    pub url: String,
8    pub title: String,
9    pub price: String,
10    pub location: String,
11    pub date: String,
12}
13
14fn parse_html(html: &str, selector: &Selector) -> Vec<Item> {
15    let fragment = Html::parse_document(html);
16
17    fragment
18        .select(selector)
19        .map(|element| {
20            let url = format!("https://www.olx.bg{}", element.value().attr("href").unwrap_or_default());
21            let title = element.select(&Selector::parse("h6.css-16v5mdi").unwrap()).next().map(|e| e.text().collect()).unwrap_or_default();
22            let price = element.select(&Selector::parse("p.css-10b0gli").unwrap()).next().map(|e| e.text().collect()).unwrap_or_default();
23            let location_date: String = element.select(&Selector::parse("p.css-veheph").unwrap()).next().map(|e| e.text().collect()).unwrap_or_default();
24            let (location, date) = location_date.split_once(" - ").unwrap_or((location_date.trim(), ""));
25
26            Item {
27                url,
28                title,
29                price,
30                location: location.trim().to_string(),
31                date: date.trim().to_string(),
32            }
33        })
34        .collect()
35}
36
37fn make_request(query: &str, category: Option<&str>, min_price: Option<&str>, max_price: Option<&str>, page: u32, sort: Option<&str>) -> Result<String, reqwest::Error> {
38    let base_url = "https://www.olx.bg/ads/";
39    let query_string = format!(
40        "{}/q-{}?page={}",
41        category.map_or_else(String::new, |cat| cat.to_string()),
42        query,
43        page
44    );
45
46    let query_string = if let Some(min) = min_price {
47        format!("{}&search[filter_float_price:from]={}", query_string, min)
48    } else {
49        query_string
50    };
51
52    let query_string = if let Some(max) = max_price {
53        format!("{}&search[filter_float_price:to]={}", query_string, max)
54    } else {
55        query_string
56    };
57
58    let query_string = if let Some(srt) = sort {
59        let order = match srt {
60            "1" => "relevance:desc",
61            "2" => "relevance:asc",
62            "3" => "created_at:desc",
63            "4" => "created_at:asc",
64            "5" => "filter_float_price:desc",
65            "6" => "filter_float_price:asc",
66            _ => std::process::exit(1), // Handle unknown sort option
67        };
68        format!("{}&search[order]={}", query_string, order)
69    } else {
70        query_string
71    };
72
73    let full_url = format!("{base_url}{query_string}", base_url = base_url, query_string = query_string).replace(' ', "-");
74
75    let response = reqwest::blocking::get(full_url)?;
76    let body = response.text().map_err(Into::into)?;
77
78    Ok(body)
79}
80
81#[must_use]
82pub fn new(query: String, category: Option<String>, min_price: Option<String>, max_price: Option<String>, end_page: Option<String>, sort: Option<&str>) -> Vec<Item> {
83    let mut items = Vec::new();
84    let mut current_page = 1;
85
86    let selector = Selector::parse("a.css-rc5s2u").unwrap();
87
88    while let Some(end_page_int) = end_page.as_deref().and_then(|s| s.parse().ok()) {
89        if current_page > end_page_int {
90            break;
91        }
92
93        match make_request(&query, category.as_deref(), min_price.as_deref(), max_price.as_deref(), current_page.try_into().unwrap(), sort) {
94            Ok(html) => {
95                let parsed_items = parse_html(&html, &selector);
96                items.extend(parsed_items);
97
98                let has_next_page = html.contains("data-testid=\"pagination-forward\"");
99                if !has_next_page {
100                    println!("[ - ] No next page after page {}", current_page);
101                    break;
102                }
103
104                println!("[ + ] Went to page {}", current_page);
105                current_page += 1;
106            }
107            Err(err) => {
108                println!("[ ! ] Error fetching page {}: {:?}", current_page, err);
109                break;
110            }
111        }
112    }
113
114    items
115}