use anyhow::{anyhow, Context};
use chrono::NaiveDate;
use scraper::{Element, ElementRef, Html, Selector};
use serde::{Deserialize, Serialize};
use url::Url;
use crate::StudIpClient;
use crate::user::{parse_simple_user, User};
use crate::ref_source::ReferenceSource;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NewsComment {
pub id: String,
pub author: User,
pub html_content: String,
pub time_since_string: String,
}
impl PartialEq for NewsComment {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NewsArticle {
pub id: String,
pub source: ReferenceSource,
pub title: String,
pub html_content: String,
pub author: User,
pub date: NaiveDate,
pub visits: usize,
pub n_comments: usize,
pub is_new: bool,
pub comments: Vec<NewsComment>
}
impl NewsArticle {
pub fn query_comments(&mut self, stud_ip_client: &StudIpClient) -> anyhow::Result<()> {
let mut url : Url = (&self.source).try_into()?;
url.set_fragment(Some(&self.id));
let response = stud_ip_client.get(url)
.query(&[("comments", "1"), ("contentbox_open", &self.id)])
.send()?;
let html = Html::parse_document(&response.text()?);
let comment_elements = Selector::parse(&format!("article[id=\"{}\"] .comments .comment", self.id))
.map_err(|_| anyhow!("Failed to parse comments selector"))?;
let time_selector = Selector::parse("time").unwrap();
let author_selector = Selector::parse("h1 > a").unwrap();
let content_selector = Selector::parse(".formatted-content").unwrap();
for comment_element in html.select(&comment_elements) {
let mut new_comments = vec![];
let comment_id = comment_element.attr("id")
.context("Expected comment id")?
.replace("newscomment-", "");
let time_since_string = comment_element.select(&time_selector)
.next()
.context("Expected comment time")?
.text()
.collect::<String>()
.trim()
.to_string();
let author_link = comment_element.select(&author_selector)
.next()
.context("Expected comment author a tag")?;
let author = parse_simple_user(author_link)?;
let content_html = comment_element.select(&content_selector)
.next()
.context("Expected comment content")?
.inner_html();
let comment = NewsComment {
id: comment_id,
author,
html_content: content_html,
time_since_string
};
new_comments.push(comment);
if !new_comments.is_empty() {
self.n_comments = new_comments.len();
self.comments = new_comments;
}
}
Ok(())
}
}
impl PartialEq for NewsArticle {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
pub fn parse_news_box(element: ElementRef, reference_source: &ReferenceSource) -> anyhow::Result<Vec<NewsArticle>> {
let articles_selector = Selector::parse("article[id].studip").unwrap();
let title_selector = Selector::parse("header h1").unwrap();
let news_author_selector = Selector::parse("header .news_user").unwrap();
let news_creation_date_selector = Selector::parse("header .news_date").unwrap();
let news_visits_selector = Selector::parse("header .news_visits").unwrap();
let news_n_comments_selector = Selector::parse("header .news_comments_indicator").unwrap();
let article_section_selector = Selector::parse("section > article").unwrap();
let content_selector = Selector::parse(".formatted-content").unwrap();
let mut news_articles = vec![];
for article_elem in element.select(&articles_selector) {
let article_id = article_elem.attr("id").unwrap().to_string();
let title = article_elem.select(&title_selector)
.next()
.context("Expected news title")?
.text()
.collect::<String>()
.trim()
.to_string();
let author_elem = article_elem.select(&news_author_selector)
.next()
.context("Expected news author")?;
let author = parse_simple_user(author_elem).context("Could not parse author user")?;
let news_date_string = article_elem.select(&news_creation_date_selector)
.next()
.context("Expected news creation date")?
.text()
.collect::<String>()
.trim()
.to_string();
let news_date = NaiveDate::parse_from_str(&news_date_string, "%d.%m.%Y")
.or_else(|_| NaiveDate::parse_from_str(&news_date_string, "%d/%m/%Y")).context("Could not parse news date")?;
let visits: usize = article_elem.select(&news_visits_selector)
.next()
.context("Expected news visits")?
.text()
.collect::<String>()
.trim()
.replace('.', "")
.parse()?;
let n_comments: usize = article_elem.select(&news_n_comments_selector).next().and_then(|e| e.text()
.collect::<String>()
.trim()
.replace('.', "")
.parse()
.ok()
).unwrap_or(0);
let is_new = article_elem.has_class(&"new".into(), scraper::CaseSensitivity::CaseSensitive);
let article_section_elem = article_elem.select(&article_section_selector).next().context("Expected article section")?;
let content_html = article_section_elem.select(&content_selector)
.next()
.map(|e| e.inner_html())
.unwrap_or(String::new()); news_articles.push(NewsArticle {
id: article_id,
source: reference_source.clone(),
title,
html_content: content_html,
author,
date: news_date,
visits,
n_comments,
is_new,
comments: vec![],
});
}
Ok(news_articles)
}