1use std::collections::HashMap;
2use std::sync::{Arc, Mutex, MutexGuard};
3
4#[derive(Debug)]
6pub struct UrlDb(Arc<Mutex<HashMap<String, Status>>>);
7
8impl Clone for UrlDb {
9 fn clone(&self) -> Self {
11 UrlDb(Arc::clone(&self.0))
12 }
13}
14
15impl UrlDb {
16 pub fn new() -> Self {
18 UrlDb(Arc::new(Mutex::new(HashMap::new())))
19 }
20
21 pub fn visited_urls_iter(&self) -> impl Iterator<Item = String> {
23 let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
24 Ok(guard) => guard,
25 Err(poisoned) => poisoned.into_inner(),
26 };
27 hm.clone()
28 .into_iter()
29 .filter(|(_k, v)| *v == Status::Visited)
30 .map(|(k, _v)| k)
31 }
32
33 pub fn staged_urls_iter(&self) -> impl Iterator<Item = String> {
35 let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
36 Ok(guard) => guard,
37 Err(poisoned) => poisoned.into_inner(),
38 };
39 hm.clone()
40 .into_iter()
41 .filter(|(_k, v)| *v == Status::Staged)
42 .map(|(k, _v)| k)
43 }
44
45 pub fn unvisited_urls_iter(&self) -> impl Iterator<Item = String> {
47 let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
48 Ok(guard) => guard,
49 Err(poisoned) => poisoned.into_inner(),
50 };
51 hm.clone()
52 .into_iter()
53 .filter(|(_k, v)| *v == Status::Unvisited)
54 .map(|(k, _v)| k)
55 }
56
57 pub fn skipped_urls_iter(&self) -> impl Iterator<Item = String> {
59 let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
60 Ok(guard) => guard,
61 Err(poisoned) => poisoned.into_inner(),
62 };
63 hm.clone()
64 .into_iter()
65 .filter(|(_k, v)| *v == Status::Skip)
66 .map(|(k, _v)| k)
67 }
68
69 pub fn errored_urls_iter(&self) -> impl Iterator<Item = String> {
72 let hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
73 Ok(guard) => guard,
74 Err(poisoned) => poisoned.into_inner(),
75 };
76 hm.clone()
77 .into_iter()
78 .filter(|(_k, v)| *v == Status::Error)
79 .map(|(k, _v)| k)
80 }
81
82 pub fn num_visited_urls(&self) -> usize {
84 self.visited_urls_iter().count()
85 }
86
87 pub fn num_staged_urls(&self) -> usize {
89 self.staged_urls_iter().count()
90 }
91
92 pub fn num_unvisited_urls(&self) -> usize {
94 self.unvisited_urls_iter().count()
95 }
96
97 pub fn num_skipped_urls(&self) -> usize {
99 self.skipped_urls_iter().count()
100 }
101
102 pub fn num_errored_urls(&self) -> usize {
104 self.errored_urls_iter().count()
105 }
106
107 pub fn mark_visited(&mut self, url: &str) -> () {
109 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
110 Ok(guard) => guard,
111 Err(poisoned) => poisoned.into_inner(),
112 };
113 hm.insert(url.to_owned(), Status::Visited);
114 }
115
116 pub fn mark_staged(&mut self, url: &str) -> () {
118 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
119 Ok(guard) => guard,
120 Err(poisoned) => poisoned.into_inner(),
121 };
122 hm.insert(url.to_owned(), Status::Staged);
123 }
124
125 pub fn mark_unvisited(&mut self, url: &str) -> () {
127 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
128 Ok(guard) => guard,
129 Err(poisoned) => poisoned.into_inner(),
130 };
131 hm.insert(url.to_owned(), Status::Unvisited);
132 }
133
134 pub fn mark_skipped(&mut self, url: &str) -> () {
136 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
137 Ok(guard) => guard,
138 Err(poisoned) => poisoned.into_inner(),
139 };
140 hm.insert(url.to_owned(), Status::Skip);
141 }
142
143 pub fn mark_errored(&mut self, url: &str) -> () {
145 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
146 Ok(guard) => guard,
147 Err(poisoned) => poisoned.into_inner(),
148 };
149 hm.insert(url.to_owned(), Status::Error);
150 }
151
152 pub fn cond_mark_visited(&mut self, url: &str) -> () {
154 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
155 Ok(guard) => guard,
156 Err(poisoned) => poisoned.into_inner(),
157 };
158 hm.entry(url.to_owned()).or_insert(Status::Visited);
159 }
160
161 pub fn cond_mark_staged(&mut self, url: &str) -> () {
163 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
164 Ok(guard) => guard,
165 Err(poisoned) => poisoned.into_inner(),
166 };
167 hm.entry(url.to_owned()).or_insert(Status::Staged);
168 }
169
170 pub fn cond_mark_unvisited(&mut self, url: &str) -> () {
172 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
173 Ok(guard) => guard,
174 Err(poisoned) => poisoned.into_inner(),
175 };
176 hm.entry(url.to_owned()).or_insert(Status::Unvisited);
177 }
178
179 pub fn cond_mark_skipped(&mut self, url: &str) -> () {
181 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
182 Ok(guard) => guard,
183 Err(poisoned) => poisoned.into_inner(),
184 };
185 hm.entry(url.to_owned()).or_insert(Status::Skip);
186 }
187
188 pub fn cond_mark_errored(&mut self, url: &str) -> () {
190 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
191 Ok(guard) => guard,
192 Err(poisoned) => poisoned.into_inner(),
193 };
194 hm.entry(url.to_owned()).or_insert(Status::Error);
195 }
196
197 pub fn stage_unvisited_urls(&mut self) {
199 let mut hm: MutexGuard<HashMap<String, Status>> = match self.0.lock() {
200 Ok(guard) => guard,
201 Err(poisoned) => poisoned.into_inner(),
202 };
203 for (k, _v) in hm
204 .clone()
205 .into_iter()
206 .filter(|(_k, v)| *v == Status::Unvisited)
207 {
208 hm.insert(k, Status::Staged);
209 }
210 }
211}
212
213#[derive(Copy, Debug, Clone)]
214enum Status {
215 Visited,
217 Staged,
219 Unvisited,
221 Skip,
223 Error,
225}
226
227impl PartialEq for Status {
228 fn eq(&self, other: &Self) -> bool {
229 match (self, other) {
230 (Self::Visited, Self::Visited) => true,
231 (Self::Staged, Self::Staged) => true,
232 (Self::Unvisited, Self::Unvisited) => true,
233 (Self::Skip, Self::Skip) => true,
234 (Self::Error, Self::Error) => true,
235 _ => false,
236 }
237 }
238}