use colored::Colorize;
use std::fmt;
use std::sync::Arc;
use anyhow::Result;
use headless_chrome::LaunchOptions;
use std::{collections::HashSet, sync::Mutex};
use scraper::Html;
use url::Url;
use crate::cli::Content;
use crate::extract;
use crate::node;
pub struct Browser {
#[allow(dead_code)] browser: headless_chrome::Browser,
pub tab: Arc<headless_chrome::Tab>,
}
impl Browser {
pub fn new_navigate(url: &Url) -> Result<Self, BrowseError> {
let browser = headless_chrome::Browser::new(
LaunchOptions::default_builder()
.devtools(false)
.build()
.map_err(|e| BrowseError::Browser(e.to_string()))?,
)?;
let tab = browser.new_tab()?;
tab.navigate_to(url.as_str())?;
tab.wait_until_navigated()?;
Ok(Self { browser, tab })
}
pub async fn parse_document(
self,
contents: &Vec<Content>,
node: &Arc<Mutex<node::Node>>,
) -> HashSet<Url> {
let response = self.tab.get_content().unwrap();
let document = Html::parse_document(&response);
let links = extract::extract_links(&node.lock().unwrap().url, &document);
for content in contents {
match content {
Content::Texts => {
extract::extract_texts(node, &document);
}
Content::Comments => {
extract::extract_comments(node, &document);
}
Content::Links => {
node.lock().unwrap().links = Some(links.clone().into_iter().collect());
}
Content::Images => {
extract::extract_images(node, &document);
}
Content::Inputs => {
extract::extract_input(node, &document);
}
Content::All => {
extract::extract_texts(node, &document);
extract::extract_comments(node, &document);
node.lock().unwrap().links = Some(links.clone().into_iter().collect());
extract::extract_images(node, &document);
extract::extract_input(node, &document);
}
};
}
links
}
}
pub enum BrowseError {
Browser(String),
}
impl BrowseError {
fn print(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BrowseError::Browser(e) => write!(f, "{}: {}", "Browser error".red(), e),
}
}
}
impl fmt::Display for BrowseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.print(f)
}
}
impl fmt::Debug for BrowseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.print(f)
}
}
impl std::error::Error for BrowseError {}
impl From<anyhow::Error> for BrowseError {
fn from(value: anyhow::Error) -> Self {
BrowseError::Browser(value.to_string())
}
}