use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use crate::html::{Href, Link, UsedLink};
use crate::urls::is_external_link;
pub trait LinkCollector<P>: Send {
fn new() -> Self;
fn ingest(&mut self, link: Link<'_, P>);
fn merge(&mut self, other: Self);
}
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct OwnedUsedLink<P> {
pub href: String,
pub path: Arc<PathBuf>,
pub paragraph: Option<P>,
}
pub struct UsedLinkCollector<P> {
pub used_links: Vec<OwnedUsedLink<P>>,
}
impl<P: Send> LinkCollector<P> for UsedLinkCollector<P> {
fn new() -> Self {
UsedLinkCollector {
used_links: Vec::new(),
}
}
fn ingest(&mut self, link: Link<'_, P>) {
if let Link::Uses(used_link) = link {
self.used_links.push(OwnedUsedLink {
href: used_link.href.0.to_owned(),
path: used_link.path.to_owned(),
paragraph: used_link.paragraph,
});
}
}
fn merge(&mut self, other: Self) {
self.used_links.extend(other.used_links);
}
}
#[derive(Debug)]
enum LinkState<P> {
Defined,
Undefined(Vec<(Arc<PathBuf>, Option<P>)>),
}
impl<P: Copy> LinkState<P> {
fn add_usage(&mut self, link: &UsedLink<P>) {
if let LinkState::Undefined(ref mut links) = self {
links.push((link.path.clone(), link.paragraph));
}
}
fn update(&mut self, other: Self) {
match self {
LinkState::Defined => (),
LinkState::Undefined(links) => match other {
LinkState::Defined => *self = LinkState::Defined,
LinkState::Undefined(links2) => links.extend(links2),
},
}
}
}
pub struct LocalLinksOnly<C> {
pub collector: C,
}
pub fn filter_local_link<P>(link: Link<'_, P>) -> Option<Link<'_, P>> {
if let Link::Uses(ref used_link) = link {
if is_external_link(used_link.href.0.as_bytes()) {
return None;
}
}
Some(link)
}
impl<P, C: LinkCollector<P>> LinkCollector<P> for LocalLinksOnly<C> {
fn new() -> Self {
LocalLinksOnly {
collector: C::new(),
}
}
fn ingest(&mut self, link: Link<'_, P>) {
if let Some(link) = filter_local_link(link) {
self.collector.ingest(link);
}
}
fn merge(&mut self, other: Self) {
self.collector.merge(other.collector);
}
}
pub struct BrokenLinkCollector<P> {
links: HashMap<String, LinkState<P>>,
used_link_count: usize,
}
impl<P: Send + Copy> LinkCollector<P> for BrokenLinkCollector<P> {
fn new() -> Self {
BrokenLinkCollector {
links: HashMap::new(),
used_link_count: 0,
}
}
fn ingest(&mut self, link: Link<'_, P>) {
match link {
Link::Uses(used_link) => {
self.used_link_count += 1;
self.links
.entry(used_link.href.0.to_owned())
.and_modify(|state| state.add_usage(&used_link))
.or_insert_with(|| {
let mut state = LinkState::Undefined(Vec::new());
state.add_usage(&used_link);
state
});
}
Link::Defines(defined_link) => {
self.links
.insert(defined_link.href.0.to_owned(), LinkState::Defined);
}
}
}
fn merge(&mut self, other: Self) {
self.used_link_count += other.used_link_count;
self.links.reserve(other.links.len());
for (href, other_state) in other.links {
if let Some(state) = self.links.get_mut(&href) {
state.update(other_state);
} else {
self.links.insert(href, other_state);
}
}
}
}
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct BrokenLink<P> {
pub hard_404: bool,
pub link: OwnedUsedLink<P>,
}
impl<P: Copy + PartialEq + Ord> BrokenLinkCollector<P> {
pub fn get_broken_links(&self, check_anchors: bool) -> impl Iterator<Item = BrokenLink<P>> {
let mut broken_links = Vec::new();
for (href, state) in self.links.iter() {
if let LinkState::Undefined(links) = state {
let hard_404 = if check_anchors {
!matches!(
self.links.get(Href(href).without_anchor().0),
Some(&LinkState::Defined)
)
} else {
true
};
for (path, paragraph) in links.iter() {
broken_links.push(BrokenLink {
hard_404,
link: OwnedUsedLink {
path: path.clone(),
paragraph: *paragraph,
href: href.clone(),
},
});
}
}
}
broken_links.sort();
broken_links.into_iter()
}
pub fn used_links_count(&self) -> usize {
self.used_link_count
}
}