use crate::application::services::content::content_fetching_service::ContentFetchingService;
use crate::core::platform::container::content::{ContentItem, ContentType, TextContent};
use url::Url;
#[doc(hidden)]
#[derive(Debug, Clone)]
pub struct HttpContentFetcher {
client: reqwest::blocking::Client,
}
impl HttpContentFetcher {
pub fn new() -> Self {
Self {
client: reqwest::blocking::Client::new(),
}
}
}
impl Default for HttpContentFetcher {
fn default() -> Self {
Self::new()
}
}
impl ContentFetchingService for HttpContentFetcher {
fn fetch_content(&self, url: &str) -> Result<ContentItem, String> {
let parsed_url = Url::parse(url).map_err(|e| format!("Invalid URL: {}", e))?;
let response = self
.client
.get(url)
.send()
.map_err(|e| format!("Failed to fetch URL: {}", e))?;
if !response.status().is_success() {
return Err(format!("HTTP error: {}", response.status()));
}
let body = response
.text()
.map_err(|e| format!("Failed to read response body: {}", e))?;
let text_content = TextContent::new(None, Some(body))
.map_err(|e| format!("Failed to create text content: {}", e))?;
let content_type = ContentType::Text(text_content);
let mut content_item = ContentItem::new(content_type)
.map_err(|e| format!("Failed to create content item: {}", e))?;
content_item.set_url(Some(parsed_url.clone()));
content_item.set_source_url(Some(parsed_url));
if let ContentType::Text(text_content) = content_item.content()
&& let Some(ref html_content) = text_content.content
&& let Some(title) = extract_title_from_html(html_content)
{
content_item.set_title(Some(title));
}
content_item.set_source(Some("web".to_string()));
content_item.set_tags(Some(vec!["web".to_string()]));
Ok(content_item)
}
}
fn extract_title_from_html(html: &str) -> Option<String> {
let title_regex = regex::Regex::new(r"<title[^>]*>([^<]*)</title>").ok()?;
title_regex
.captures(html)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str().trim().to_string())
.filter(|s| !s.is_empty())
}
#[cfg(test)]
mod tests {
use super::*;
use mockito::Server;
#[test]
fn test_fetch_content_success() {
let mut server = Server::new();
let mock = server
.mock("GET", "/test")
.with_status(200)
.with_header("content-type", "text/html")
.with_body(
"<html><head><title>Test Page</title></head><body>Test content</body></html>",
)
.create();
let fetcher = HttpContentFetcher::new();
let url = format!("{}/test", server.url());
let result = fetcher.fetch_content(&url);
assert!(result.is_ok());
let content_item = result.unwrap();
assert!(content_item.title().is_some());
assert_eq!(content_item.title().unwrap(), "Test Page");
assert!(matches!(content_item.content(), ContentType::Text(_)));
if let ContentType::Text(text_content) = content_item.content() {
assert!(text_content.content.is_some());
assert!(
text_content
.content
.as_ref()
.unwrap()
.contains("Test content")
);
}
mock.assert();
}
#[test]
fn test_fetch_content_invalid_url() {
let fetcher = HttpContentFetcher::new();
let result = fetcher.fetch_content("not-a-valid-url");
assert!(result.is_err());
assert!(result.unwrap_err().contains("Invalid URL"));
}
#[test]
fn test_fetch_content_http_error() {
let mut server = Server::new();
let mock = server.mock("GET", "/error").with_status(404).create();
let fetcher = HttpContentFetcher::new();
let url = format!("{}/error", server.url());
let result = fetcher.fetch_content(&url);
assert!(result.is_err());
assert!(result.unwrap_err().contains("HTTP error"));
mock.assert();
}
#[test]
fn test_extract_title_from_html() {
let html = "<html><head><title>My Page Title</title></head><body>Content</body></html>";
let title = extract_title_from_html(html);
assert_eq!(title, Some("My Page Title".to_string()));
let html_no_title = "<html><body>Content without title</body></html>";
let no_title = extract_title_from_html(html_no_title);
assert_eq!(no_title, None);
}
#[test]
fn test_default_construction() {
let _fetcher = HttpContentFetcher::default();
}
}