use reqwest::blocking::Client;
use scraper::{Html, Selector};
use std::error::Error;
use url::Url;
pub(crate) fn get_data_source(
url: &str,
matching: &str,
) -> Result<String, Box<dyn Error + Send + Sync>> {
let client = Client::new();
let response = client.get(url).send()?;
let body = response.text()?;
let document = Html::parse_document(&body);
let selector = Selector::parse("a").unwrap();
let mut source: Option<String> = None;
for element in document.select(&selector) {
if let Some(href) = element.value().attr("href") {
if href.contains(matching) && (href.ends_with("zip") || href.ends_with("7z")) {
source = Some(href.to_string());
}
}
}
if let Some(mut source) = source {
if !source.starts_with("http") {
let url_obj = Url::parse(url)?;
let base = format!("{}://{}", url_obj.scheme(), url_obj.host_str().unwrap());
let slash = if !url_obj.path().ends_with('/') && !source.starts_with('/') {
"/"
} else {
""
};
source = format!("{}{}{}", base, slash, source);
}
Ok(source)
} else {
Err("No matching source found".into())
}
}
pub(crate) fn get_file_name_from_url(url: &str) -> String {
let last_param = url.split('/').last().unwrap_or("");
let file_name = last_param.split('=').last().unwrap_or("");
file_name.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use std::error::Error;
#[test]
fn test_get_data_source_valid() -> Result<(), Box<dyn Error + Send + Sync>> {
let url = "https://www.progettosnaps.net/languages";
let matching = "download";
let result = get_data_source(url, matching);
assert!(result.is_ok());
let source_url = result.unwrap();
assert!(source_url.contains(matching));
assert!(source_url.ends_with("zip") || source_url.ends_with("7z"));
Ok(())
}
#[test]
fn test_get_data_source_no_match() -> Result<(), Box<dyn Error + Send + Sync>> {
let url = "https://www.progettosnaps.net/languages";
let matching = "nonexistentfile";
let result = get_data_source(url, matching);
assert!(result.is_err());
assert_eq!(result.unwrap_err().to_string(), "No matching source found");
Ok(())
}
#[test]
fn test_get_file_name_basic() {
let url = "https://example.com/downloads/file.zip";
let expected = "file.zip";
let result = get_file_name_from_url(url);
assert_eq!(result, expected);
}
#[test]
fn test_get_file_name_with_query_params() {
let url = "https://example.com/download?file=file.zip";
let expected = "file.zip";
let result = get_file_name_from_url(url);
assert_eq!(result, expected);
}
#[test]
fn test_get_file_name_no_slash() {
let url = "https://example.com/file.zip";
let expected = "file.zip";
let result = get_file_name_from_url(url);
assert_eq!(result, expected);
}
#[test]
fn test_get_file_name_empty_string() {
let url = "";
let expected = "";
let result = get_file_name_from_url(url);
assert_eq!(result, expected);
}
#[test]
fn test_get_file_name_no_file_name() {
let url = "https://example.com/downloads/";
let expected = "";
let result = get_file_name_from_url(url);
assert_eq!(result, expected);
}
}