tgd/
lib.rs

1//! tgd - tribal government directory
2//!
3//! A command line utility (cli) to query a directory of tribal governments
4//!
5//! Examples:
6//! ```console
7//! $ tgd list # lists the names of all tribal governments
8//! $ tgd list --name Muscogee # lists tribal governments with given name
9//! $ tgd list --websites https # lists tribal governments who have https websites
10//! $ tgd --help # more details about how it works
11//! ```
12
13use cli_table::{format::Justify, Cell, Style, Table};
14use regex::Regex;
15use select::document::Document;
16use select::predicate::{Attr, Class, Name};
17use serde::{Deserialize, Serialize};
18use std::error::Error;
19use std::fmt;
20use std::path::Path;
21use tokio::fs;
22
23pub mod args;
24
25// Scrape tribal information based on this HTML structure:
26//   <article class="clearfix"
27//     <h2> {name} <span> {specifier} </span> </h2>
28//     <p>  {contact} ... Recognition Status: {status: federal/state} </p>
29//     <p class="right"> {address} ... Website: {website} </p>
30//   </article>
31fn select_html(res: &str) -> Vec<Vec<String>> {
32    let document = Document::from(res);
33    let articles = document
34        .find(Name("article"))
35        .into_selection()
36        .filter(Class("clearfix"));
37
38    let mut data: Vec<String> = vec![];
39
40    /* Iterate on article tags to pluck out the gov information */
41    for node in articles.into_iter() {
42        /* Get nation with region */
43        let name = node.find(Name("h2")).next().unwrap().text();
44        let name_vec: Vec<&str> = name.split('[').collect();
45
46        /* Get nation without region */
47        let name_without_region: String = name_vec
48            .first()
49            .map(|v: &&str| v.trim_end())
50            .unwrap_or("")
51            .to_owned();
52        data.push(name_without_region);
53
54        /* Get region */
55        let region: String = name_vec
56            .first()
57            .map(|v: &&str| v.trim_end_matches(|c| c == ' ' || c == ']' || c == '\n'))
58            .unwrap_or("")
59            .to_owned();
60        data.push(region);
61
62        /* Get recognition */
63        let contact = node.find(Name("p")).next().unwrap().text();
64        let recognition_vec: Vec<&str> = contact
65            .split('\n')
66            .filter(|v| v.contains("Recognition"))
67            .map(|v| v.trim())
68            .collect();
69        let status: String = recognition_vec
70            .first()
71            .and_then(|v: &&str| v.get(20..))
72            .unwrap_or("")
73            .to_owned();
74        data.push(status);
75
76        /* Get website */
77        let info = node.find(Attr("class", "right")).next().unwrap().text();
78        let mut contact_vec: Vec<&str> = info.split('\n').map(|v| v.trim()).collect();
79        let website = contact_vec.split_off(2);
80
81        /* Get address */
82        let address = contact_vec.join(", ");
83        let address_regex = Regex::new(
84            r"(?P<addr>[\w|\W]+),\s(?P<city>[\w|\W]+),\s(?P<state>[A-Z]{2})(?P<zip>\d+|\d+\-\d+)",
85        )
86        .unwrap();
87        let next_addr = address_regex
88            .replace_all(&address, "$addr $city, $state $zip")
89            .to_string();
90        data.push(next_addr);
91
92        /* Get website */
93        let site = website
94            .join(" ")
95            .get(8..)
96            .map(|v| v.trim())
97            .unwrap_or("")
98            .to_owned();
99        data.push(site);
100    }
101
102    let chunks = data.chunks(5).map(|c| c.into()).collect();
103
104    chunks
105}
106
107#[derive(Debug)]
108struct FileExistsError;
109
110impl Error for FileExistsError {}
111
112impl fmt::Display for FileExistsError {
113    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114        write!(f, "The file ./tribes.csv already exits. If you want to overwrite this file use `tgd update --force`")
115    }
116}
117
118/// Go to the page for the tribal directory, <https://www.ncai.org/tribal-directory?page=1>,
119/// and output the data into a CSV file
120pub async fn scrape_tribal_dir(force_flag: bool) -> Result<(), Box<dyn Error>> {
121    // Check if tribes.csv exists, if it does and user uses --force flag, remove it and create a new file, otherwise exit saying that the file already exists
122    let file_exists = Path::new("./tribes.csv").exists();
123
124    if file_exists && !force_flag {
125        return Err(Box::new(FileExistsError {}));
126    }
127
128    // Remove existing csv file
129    fs::remove_file("./tribes.csv").await?;
130
131    println!("💻 Requesting tribal directory from https://naci.org/tribal-directory");
132    let mut wtr = csv::WriterBuilder::new()
133        .flexible(true)
134        .from_path("tribes.csv")?;
135
136    /* Create columns for csv */
137    wtr.write_record(["Nation", "Region", "Recognition", "Address", "Website"])?;
138
139    println!("💿 Parsing HTML");
140    for number in 1..=26 {
141        /* Fetch html from ncai tribal directory from pages 1 - 26 (# of letters in alphabet)  */
142        let res = reqwest::get(
143            "https://www.ncai.org/tribal-directory?page=".to_owned() + &number.to_string(),
144        )
145        .await?
146        .text()
147        .await?;
148
149        /* Get data for each csv column */
150        let data = select_html(&res);
151
152        /* Write data to tribes.csv */
153        for d in data.iter() {
154            wtr.write_record(d)?
155        }
156    }
157
158    wtr.flush()?;
159    println!("💾 Saved file to ./tribes.csv");
160    Ok(())
161}
162
163#[derive(Serialize, Deserialize, Debug)]
164struct Nation {
165    #[serde(alias = "Nation")]
166    nation: String,
167    #[serde(alias = "Region")]
168    region: String,
169    #[serde(alias = "Recognition")]
170    recognition: String,
171    #[serde(alias = "Address")]
172    address: String,
173    #[serde(alias = "Website")]
174    website: String,
175}
176
177/// Take the CSV data and output all the governments
178pub fn list_govts() {
179    let mut rdr = csv::Reader::from_path("./tribes.csv").expect("File tribes.csv does not exist");
180    let mut table = Vec::new();
181    for n in rdr.deserialize() {
182        let nation: Nation = n.unwrap();
183        let row = vec![nation.nation.cell().justify(Justify::Left)];
184        table.push(row);
185    }
186
187    let t = table
188        .table()
189        .title(vec!["Name".cell().bold(true)])
190        .display()
191        .unwrap();
192
193    println!("{}", t);
194}
195
196/// Filter the governments based on website filter, state, and name
197pub fn filter_govts(
198    filter: &Option<args::WebsiteFilter>,
199    state: &Option<String>,
200    name: &Option<String>,
201) {
202    let mut rdr = csv::Reader::from_path("./tribes.csv")
203        .expect("File tribes.csv does not exist. Run `tgd update`");
204    let mut data = Vec::new();
205
206    for n in rdr.deserialize() {
207        let nation: Nation = n.unwrap();
208        data.push(nation);
209    }
210
211    if let Some(n) = name {
212        data = data
213            .into_iter()
214            .filter(|nation| nation.nation.contains(n))
215            .collect();
216    }
217
218    if let Some(s) = state {
219        data = data.into_iter().filter(|n| n.address.contains(s)).collect();
220    }
221
222    if let Some(f) = filter {
223        match f {
224            args::WebsiteFilter::DotGov => {
225                data = data
226                    .into_iter()
227                    .filter(|n| n.website.ends_with(".gov"))
228                    .collect();
229            }
230            args::WebsiteFilter::DotCom => {
231                data = data
232                    .into_iter()
233                    .filter(|n| n.website.ends_with(".com"))
234                    .collect();
235            }
236            args::WebsiteFilter::DotOrg => {
237                data = data
238                    .into_iter()
239                    .filter(|n| n.website.ends_with(".org"))
240                    .collect();
241            }
242            args::WebsiteFilter::DotNet => {
243                data = data
244                    .into_iter()
245                    .filter(|n| n.website.ends_with(".net"))
246                    .collect();
247            }
248            args::WebsiteFilter::Http => {
249                data = data
250                    .into_iter()
251                    .filter(|n| n.website.starts_with("http:"))
252                    .collect();
253            }
254            args::WebsiteFilter::Https => {
255                data = data
256                    .into_iter()
257                    .filter(|n| n.website.starts_with("https:"))
258                    .collect();
259            }
260        }
261    }
262
263    let mut table = Vec::new();
264
265    for nation in &data {
266        let row = vec![
267            nation.nation.as_str().cell().justify(Justify::Left),
268            nation.website.as_str().cell().justify(Justify::Left),
269        ];
270        table.push(row);
271    }
272
273    let t = table
274        .table()
275        .title(vec!["Name".cell().bold(true), "Website".cell().bold(true)])
276        .display()
277        .unwrap();
278
279    println!("{}", t);
280}
281
282/// Get basic stats about each tribal government
283pub fn stats(filter: &Option<args::WebsiteFilter>) {
284    let mut rdr = csv::Reader::from_path("./tribes.csv")
285        .expect("File tribes.csv does not exist. Run ``tgd update`");
286    let mut data = Vec::new();
287
288    let mut number_of_nations = 0;
289    let mut number_of_websites = 0;
290    let mut result = "".to_owned();
291    let mut percent_websites = "".to_owned();
292    let mut percent_nations = "".to_owned();
293
294    if let Some(f) = filter {
295        for n in rdr.deserialize() {
296            let nation: Nation = n.unwrap();
297            if nation.recognition == "Federal" {
298                number_of_nations += 1;
299
300                if !nation.website.is_empty() {
301                    number_of_websites += 1;
302                }
303            }
304            data.push(nation);
305        }
306
307        match f {
308            args::WebsiteFilter::DotGov => {
309                data = data
310                    .into_iter()
311                    .filter(|n| n.website.ends_with(".gov"))
312                    .collect();
313
314                result.push_str("sites with dot gov domains: ");
315                result.push_str(data.len().to_string().as_str());
316
317                let pw = data.len() * 100 / number_of_websites;
318                let pn = data.len() * 100 / number_of_nations;
319
320                percent_websites.push_str(pw.to_string().as_str());
321                percent_websites.push_str("%");
322                percent_nations.push_str(pn.to_string().as_str());
323                percent_nations.push_str("%");
324            }
325            args::WebsiteFilter::DotCom => {
326                data = data
327                    .into_iter()
328                    .filter(|n| n.website.ends_with(".com"))
329                    .collect();
330
331                result.push_str("sites with dot com domains: ");
332                result.push_str(data.len().to_string().as_str());
333
334                let pw = data.len() * 100 / number_of_websites;
335                let pn = data.len() * 100 / number_of_nations;
336
337                percent_websites.push_str(pw.to_string().as_str());
338                percent_nations.push_str(pn.to_string().as_str());
339            }
340            args::WebsiteFilter::DotOrg => {
341                data = data
342                    .into_iter()
343                    .filter(|n| n.website.ends_with(".org"))
344                    .collect();
345
346                result.push_str("sites with dot org domains: ");
347                result.push_str(data.len().to_string().as_str());
348
349                let pw = data.len() * 100 / number_of_websites;
350                let pn = data.len() * 100 / number_of_nations;
351
352                percent_websites.push_str(pw.to_string().as_str());
353                percent_websites.push_str("%");
354                percent_nations.push_str(pn.to_string().as_str());
355                percent_nations.push_str("%");
356            }
357            args::WebsiteFilter::DotNet => {
358                data = data
359                    .into_iter()
360                    .filter(|n| n.website.ends_with(".net"))
361                    .collect();
362
363                result.push_str("sites with dot net domains: ");
364                result.push_str(data.len().to_string().as_str());
365
366                let pw = data.len() * 100 / number_of_websites;
367                let pn = data.len() * 100 / number_of_nations;
368
369                percent_websites.push_str(pw.to_string().as_str());
370                percent_websites.push_str("%");
371                percent_nations.push_str(pn.to_string().as_str());
372                percent_nations.push_str("%");
373            }
374            args::WebsiteFilter::Http => {
375                data = data
376                    .into_iter()
377                    .filter(|n| n.website.starts_with("http:"))
378                    .collect();
379
380                result.push_str("sites with http: ");
381                result.push_str(data.len().to_string().as_str());
382
383                let pw = data.len() * 100 / number_of_websites;
384                let pn = data.len() * 100 / number_of_nations;
385
386                percent_websites.push_str(pw.to_string().as_str());
387                percent_websites.push_str("%");
388                percent_nations.push_str(pn.to_string().as_str());
389                percent_nations.push_str("%");
390            }
391            args::WebsiteFilter::Https => {
392                data = data
393                    .into_iter()
394                    .filter(|n| n.website.starts_with("https:"))
395                    .collect();
396
397                result.push_str("sites with https: ");
398                result.push_str(data.len().to_string().as_str());
399
400                let pw = data.len() * 100 / number_of_websites;
401                let pn = data.len() * 100 / number_of_nations;
402
403                percent_websites.push_str(pw.to_string().as_str());
404                percent_websites.push_str("%");
405                percent_nations.push_str(pn.to_string().as_str());
406                percent_nations.push_str("%");
407            }
408        }
409    }
410
411    println!("\n");
412    println!("{result}");
413    println!("percent of all websites: {percent_websites}");
414    println!("percent of all nations: {percent_nations}");
415}
tgd/lib.rs

tgd/
lib.rs