use std::collections::HashMap;
use std::sync::{Arc, Mutex, MutexGuard};
#[derive(Debug)]
pub struct UrlDb(Arc<Mutex<HashMap<String, Status>>>);
impl Clone for UrlDb {
fn clone(&self) -> Self {
UrlDb(Arc::clone(&self.0))
}
}
impl UrlDb {
pub fn new() -> Self {
UrlDb(Arc::new(Mutex::new(HashMap::new())))
}
pub fn visited_urls_iter(&self) -> impl Iterator<Item = String> {
let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.clone()
.into_iter()
.filter(|(_k, v)| *v == Status::Visited)
.map(|(k, _v)| k)
}
pub fn staged_urls_iter(&self) -> impl Iterator<Item = String> {
let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.clone()
.into_iter()
.filter(|(_k, v)| *v == Status::Staged)
.map(|(k, _v)| k)
}
pub fn unvisited_urls_iter(&self) -> impl Iterator<Item = String> {
let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.clone()
.into_iter()
.filter(|(_k, v)| *v == Status::Unvisited)
.map(|(k, _v)| k)
}
pub fn skipped_urls_iter(&self) -> impl Iterator<Item = String> {
let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.clone()
.into_iter()
.filter(|(_k, v)| *v == Status::Skip)
.map(|(k, _v)| k)
}
pub fn errored_urls_iter(&self) -> impl Iterator<Item = String> {
let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.clone()
.into_iter()
.filter(|(_k, v)| *v == Status::Error)
.map(|(k, _v)| k)
}
pub fn num_visited_urls(&self) -> usize {
self.visited_urls_iter().count()
}
pub fn num_staged_urls(&self) -> usize {
self.staged_urls_iter().count()
}
pub fn num_unvisited_urls(&self) -> usize {
self.unvisited_urls_iter().count()
}
pub fn num_skipped_urls(&self) -> usize {
self.skipped_urls_iter().count()
}
pub fn num_errored_urls(&self) -> usize {
self.errored_urls_iter().count()
}
pub fn mark_visited(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.insert(url.to_owned(), Status::Visited);
}
pub fn mark_staged(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.insert(url.to_owned(), Status::Staged);
}
pub fn mark_unvisited(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.insert(url.to_owned(), Status::Unvisited);
}
pub fn mark_skipped(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.insert(url.to_owned(), Status::Skip);
}
pub fn mark_errored(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.insert(url.to_owned(), Status::Error);
}
pub fn cond_mark_visited(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.entry(url.clone()).or_insert(Status::Visited);
}
pub fn cond_mark_staged(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.entry(url.clone()).or_insert(Status::Staged);
}
pub fn cond_mark_unvisited(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.entry(url.clone()).or_insert(Status::Unvisited);
}
pub fn cond_mark_skipped(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.entry(url.clone()).or_insert(Status::Skip);
}
pub fn cond_mark_errored(&mut self, url: String) -> () {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
hm.entry(url.clone()).or_insert(Status::Error);
}
pub fn stage_unvisited_urls(&mut self) {
let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
};
for (k, _v) in hm
.clone()
.into_iter()
.filter(|(_k, v)| *v == Status::Unvisited)
{
hm.insert(k, Status::Staged);
}
}
}
#[derive(Copy, Debug, Clone)]
enum Status {
Visited,
Staged,
Unvisited,
Skip,
Error,
}
impl PartialEq for Status {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Visited, Self::Visited)
| (Self::Staged, Self::Staged)
| (Self::Unvisited, Self::Unvisited)
| (Self::Skip, Self::Skip)
| (Self::Error, Self::Error) => true,
_ => false,
}
}
}