1use cli_table::{format::Justify, Cell, Style, Table};
14use regex::Regex;
15use select::document::Document;
16use select::predicate::{Attr, Class, Name};
17use serde::{Deserialize, Serialize};
18use std::error::Error;
19use std::fmt;
20use std::path::Path;
21use tokio::fs;
22
23pub mod args;
24
25fn select_html(res: &str) -> Vec<Vec<String>> {
32 let document = Document::from(res);
33 let articles = document
34 .find(Name("article"))
35 .into_selection()
36 .filter(Class("clearfix"));
37
38 let mut data: Vec<String> = vec![];
39
40 for node in articles.into_iter() {
42 let name = node.find(Name("h2")).next().unwrap().text();
44 let name_vec: Vec<&str> = name.split('[').collect();
45
46 let name_without_region: String = name_vec
48 .first()
49 .map(|v: &&str| v.trim_end())
50 .unwrap_or("")
51 .to_owned();
52 data.push(name_without_region);
53
54 let region: String = name_vec
56 .first()
57 .map(|v: &&str| v.trim_end_matches(|c| c == ' ' || c == ']' || c == '\n'))
58 .unwrap_or("")
59 .to_owned();
60 data.push(region);
61
62 let contact = node.find(Name("p")).next().unwrap().text();
64 let recognition_vec: Vec<&str> = contact
65 .split('\n')
66 .filter(|v| v.contains("Recognition"))
67 .map(|v| v.trim())
68 .collect();
69 let status: String = recognition_vec
70 .first()
71 .and_then(|v: &&str| v.get(20..))
72 .unwrap_or("")
73 .to_owned();
74 data.push(status);
75
76 let info = node.find(Attr("class", "right")).next().unwrap().text();
78 let mut contact_vec: Vec<&str> = info.split('\n').map(|v| v.trim()).collect();
79 let website = contact_vec.split_off(2);
80
81 let address = contact_vec.join(", ");
83 let address_regex = Regex::new(
84 r"(?P<addr>[\w|\W]+),\s(?P<city>[\w|\W]+),\s(?P<state>[A-Z]{2})(?P<zip>\d+|\d+\-\d+)",
85 )
86 .unwrap();
87 let next_addr = address_regex
88 .replace_all(&address, "$addr $city, $state $zip")
89 .to_string();
90 data.push(next_addr);
91
92 let site = website
94 .join(" ")
95 .get(8..)
96 .map(|v| v.trim())
97 .unwrap_or("")
98 .to_owned();
99 data.push(site);
100 }
101
102 let chunks = data.chunks(5).map(|c| c.into()).collect();
103
104 chunks
105}
106
107#[derive(Debug)]
108struct FileExistsError;
109
110impl Error for FileExistsError {}
111
112impl fmt::Display for FileExistsError {
113 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 write!(f, "The file ./tribes.csv already exits. If you want to overwrite this file use `tgd update --force`")
115 }
116}
117
118pub async fn scrape_tribal_dir(force_flag: bool) -> Result<(), Box<dyn Error>> {
121 let file_exists = Path::new("./tribes.csv").exists();
123
124 if file_exists && !force_flag {
125 return Err(Box::new(FileExistsError {}));
126 }
127
128 fs::remove_file("./tribes.csv").await?;
130
131 println!("💻 Requesting tribal directory from https://naci.org/tribal-directory");
132 let mut wtr = csv::WriterBuilder::new()
133 .flexible(true)
134 .from_path("tribes.csv")?;
135
136 wtr.write_record(["Nation", "Region", "Recognition", "Address", "Website"])?;
138
139 println!("💿 Parsing HTML");
140 for number in 1..=26 {
141 let res = reqwest::get(
143 "https://www.ncai.org/tribal-directory?page=".to_owned() + &number.to_string(),
144 )
145 .await?
146 .text()
147 .await?;
148
149 let data = select_html(&res);
151
152 for d in data.iter() {
154 wtr.write_record(d)?
155 }
156 }
157
158 wtr.flush()?;
159 println!("💾 Saved file to ./tribes.csv");
160 Ok(())
161}
162
163#[derive(Serialize, Deserialize, Debug)]
164struct Nation {
165 #[serde(alias = "Nation")]
166 nation: String,
167 #[serde(alias = "Region")]
168 region: String,
169 #[serde(alias = "Recognition")]
170 recognition: String,
171 #[serde(alias = "Address")]
172 address: String,
173 #[serde(alias = "Website")]
174 website: String,
175}
176
177pub fn list_govts() {
179 let mut rdr = csv::Reader::from_path("./tribes.csv").expect("File tribes.csv does not exist");
180 let mut table = Vec::new();
181 for n in rdr.deserialize() {
182 let nation: Nation = n.unwrap();
183 let row = vec![nation.nation.cell().justify(Justify::Left)];
184 table.push(row);
185 }
186
187 let t = table
188 .table()
189 .title(vec!["Name".cell().bold(true)])
190 .display()
191 .unwrap();
192
193 println!("{}", t);
194}
195
196pub fn filter_govts(
198 filter: &Option<args::WebsiteFilter>,
199 state: &Option<String>,
200 name: &Option<String>,
201) {
202 let mut rdr = csv::Reader::from_path("./tribes.csv")
203 .expect("File tribes.csv does not exist. Run `tgd update`");
204 let mut data = Vec::new();
205
206 for n in rdr.deserialize() {
207 let nation: Nation = n.unwrap();
208 data.push(nation);
209 }
210
211 if let Some(n) = name {
212 data = data
213 .into_iter()
214 .filter(|nation| nation.nation.contains(n))
215 .collect();
216 }
217
218 if let Some(s) = state {
219 data = data.into_iter().filter(|n| n.address.contains(s)).collect();
220 }
221
222 if let Some(f) = filter {
223 match f {
224 args::WebsiteFilter::DotGov => {
225 data = data
226 .into_iter()
227 .filter(|n| n.website.ends_with(".gov"))
228 .collect();
229 }
230 args::WebsiteFilter::DotCom => {
231 data = data
232 .into_iter()
233 .filter(|n| n.website.ends_with(".com"))
234 .collect();
235 }
236 args::WebsiteFilter::DotOrg => {
237 data = data
238 .into_iter()
239 .filter(|n| n.website.ends_with(".org"))
240 .collect();
241 }
242 args::WebsiteFilter::DotNet => {
243 data = data
244 .into_iter()
245 .filter(|n| n.website.ends_with(".net"))
246 .collect();
247 }
248 args::WebsiteFilter::Http => {
249 data = data
250 .into_iter()
251 .filter(|n| n.website.starts_with("http:"))
252 .collect();
253 }
254 args::WebsiteFilter::Https => {
255 data = data
256 .into_iter()
257 .filter(|n| n.website.starts_with("https:"))
258 .collect();
259 }
260 }
261 }
262
263 let mut table = Vec::new();
264
265 for nation in &data {
266 let row = vec![
267 nation.nation.as_str().cell().justify(Justify::Left),
268 nation.website.as_str().cell().justify(Justify::Left),
269 ];
270 table.push(row);
271 }
272
273 let t = table
274 .table()
275 .title(vec!["Name".cell().bold(true), "Website".cell().bold(true)])
276 .display()
277 .unwrap();
278
279 println!("{}", t);
280}
281
282pub fn stats(filter: &Option<args::WebsiteFilter>) {
284 let mut rdr = csv::Reader::from_path("./tribes.csv")
285 .expect("File tribes.csv does not exist. Run ``tgd update`");
286 let mut data = Vec::new();
287
288 let mut number_of_nations = 0;
289 let mut number_of_websites = 0;
290 let mut result = "".to_owned();
291 let mut percent_websites = "".to_owned();
292 let mut percent_nations = "".to_owned();
293
294 if let Some(f) = filter {
295 for n in rdr.deserialize() {
296 let nation: Nation = n.unwrap();
297 if nation.recognition == "Federal" {
298 number_of_nations += 1;
299
300 if !nation.website.is_empty() {
301 number_of_websites += 1;
302 }
303 }
304 data.push(nation);
305 }
306
307 match f {
308 args::WebsiteFilter::DotGov => {
309 data = data
310 .into_iter()
311 .filter(|n| n.website.ends_with(".gov"))
312 .collect();
313
314 result.push_str("sites with dot gov domains: ");
315 result.push_str(data.len().to_string().as_str());
316
317 let pw = data.len() * 100 / number_of_websites;
318 let pn = data.len() * 100 / number_of_nations;
319
320 percent_websites.push_str(pw.to_string().as_str());
321 percent_websites.push_str("%");
322 percent_nations.push_str(pn.to_string().as_str());
323 percent_nations.push_str("%");
324 }
325 args::WebsiteFilter::DotCom => {
326 data = data
327 .into_iter()
328 .filter(|n| n.website.ends_with(".com"))
329 .collect();
330
331 result.push_str("sites with dot com domains: ");
332 result.push_str(data.len().to_string().as_str());
333
334 let pw = data.len() * 100 / number_of_websites;
335 let pn = data.len() * 100 / number_of_nations;
336
337 percent_websites.push_str(pw.to_string().as_str());
338 percent_nations.push_str(pn.to_string().as_str());
339 }
340 args::WebsiteFilter::DotOrg => {
341 data = data
342 .into_iter()
343 .filter(|n| n.website.ends_with(".org"))
344 .collect();
345
346 result.push_str("sites with dot org domains: ");
347 result.push_str(data.len().to_string().as_str());
348
349 let pw = data.len() * 100 / number_of_websites;
350 let pn = data.len() * 100 / number_of_nations;
351
352 percent_websites.push_str(pw.to_string().as_str());
353 percent_websites.push_str("%");
354 percent_nations.push_str(pn.to_string().as_str());
355 percent_nations.push_str("%");
356 }
357 args::WebsiteFilter::DotNet => {
358 data = data
359 .into_iter()
360 .filter(|n| n.website.ends_with(".net"))
361 .collect();
362
363 result.push_str("sites with dot net domains: ");
364 result.push_str(data.len().to_string().as_str());
365
366 let pw = data.len() * 100 / number_of_websites;
367 let pn = data.len() * 100 / number_of_nations;
368
369 percent_websites.push_str(pw.to_string().as_str());
370 percent_websites.push_str("%");
371 percent_nations.push_str(pn.to_string().as_str());
372 percent_nations.push_str("%");
373 }
374 args::WebsiteFilter::Http => {
375 data = data
376 .into_iter()
377 .filter(|n| n.website.starts_with("http:"))
378 .collect();
379
380 result.push_str("sites with http: ");
381 result.push_str(data.len().to_string().as_str());
382
383 let pw = data.len() * 100 / number_of_websites;
384 let pn = data.len() * 100 / number_of_nations;
385
386 percent_websites.push_str(pw.to_string().as_str());
387 percent_websites.push_str("%");
388 percent_nations.push_str(pn.to_string().as_str());
389 percent_nations.push_str("%");
390 }
391 args::WebsiteFilter::Https => {
392 data = data
393 .into_iter()
394 .filter(|n| n.website.starts_with("https:"))
395 .collect();
396
397 result.push_str("sites with https: ");
398 result.push_str(data.len().to_string().as_str());
399
400 let pw = data.len() * 100 / number_of_websites;
401 let pn = data.len() * 100 / number_of_nations;
402
403 percent_websites.push_str(pw.to_string().as_str());
404 percent_websites.push_str("%");
405 percent_nations.push_str(pn.to_string().as_str());
406 percent_nations.push_str("%");
407 }
408 }
409 }
410
411 println!("\n");
412 println!("{result}");
413 println!("percent of all websites: {percent_websites}");
414 println!("percent of all nations: {percent_nations}");
415}