assist 0.1.0

assist.org rust analysis
Documentation
use std::{collections::HashSet, fs::{File, OpenOptions}, io::{self, BufRead, Write}, sync::{Arc, Mutex}};

use assist::{Agreement, AllDepartment, AllMajor, AllPrefix, Institution, Institutions};
use futures::{stream::FuturesUnordered, StreamExt};
use reqwest::ClientBuilder;
use serde::{Deserialize, Serialize};
use serde_json::{Error, Value};

#[derive(Default, Deserialize, Serialize, Clone, Debug)]
struct Endpoints {
    receiving_institution: Institution,
    sending_institution: Institution,
    all_majors: Option<Vec<AllMajor>>,
    all_departments: Option<Vec<AllDepartment>>,
    all_prefixes: Option<Vec<AllPrefix>>,
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let institutions = reqwest::get("https://assist.org/api/institutions").await.unwrap().json::<Vec<Institutions>>().await.unwrap();
    let ids = institutions.clone().into_iter().map(|x| x.id as usize).collect::<Vec<usize>>();
    let vec_size = ids.into_iter().max().unwrap() + 1;
    let mut relations = Vec::with_capacity(vec_size);
    for _ in 0..vec_size {
        relations.push(vec![]);
    };
    let relations = Arc::new(Mutex::new(relations));
    let mut futs = FuturesUnordered::new();
    let mut outputs = Vec::new();
    let threads = 32;
    for i in institutions {
        let relations = Arc::clone(&relations);
        let fut = async move {
            eprintln!("{}", format!("https://assist.org/api/institutions/{}/agreements", i.id));
            let agreements = reqwest::get(format!("https://assist.org/api/institutions/{}/agreements", i.id)).await.unwrap().json::<Vec<Agreement>>().await.unwrap();
            let mut years = vec![];
            for _ in 0..vec_size {
                years.push(0);
            };
            for agree in agreements {
                if agree.sending_year_ids.last().is_some() {
                    years[agree.institution_parent_id as usize] = agree.sending_year_ids.last().unwrap().clone();
                }
            }
            relations.lock().unwrap()[i.id as usize] = years;
        };
        futs.push(fut);

        if futs.len() == threads {
            outputs.push(futs.next().await.unwrap());
        }
    }
    while let Some(item) = futs.next().await {
        outputs.push(item);
    }
    /*for i in 0..=relations.len()-1 {
        println!("{}: {:?}", i, relations[i]);
    }
    let mut pairs = Vec::new();
    for i in 0..=vec_size-1 {
        for j in i..=vec_size-1 {
            if i != j && !relations[i].is_empty() && relations[i][j] != 0 {
                pairs.push(((i, j), relations[i][j]));
            }
        }
    }
    println!("{:#?}", pairs);*/
    let mut urls = Vec::new();
    let mut checked = HashSet::new();
    let relations = relations.lock().unwrap().clone();
    let path = "urls.txt";
    let file = match File::open(&path) {
    Ok(file) => file,
    Err(_) => {File::create_new(&path).unwrap(); File::open(&path).unwrap()},
    };
    let reader = io::BufReader::new(file);

    for line in reader.lines() {
        let line = line.unwrap().clone().trim().to_string();
        // Validate and process the URL
        if line.starts_with("http://") || line.starts_with("https://") {
            checked.insert(line);
        }
    }

    for i in 1..=relations.len()-1 {
        if !relations[i].is_empty() {
            for j in 1..=relations.len()-1 {
                if i != j && relations[i][j] != 0 {
                    if relations[i][j] >= 74 {
                        let url = format!("https://assist.org/api/articulation/Agreements?Key={}/{}/to/{}/AllPrefixes", relations[i][j], j, i);
                        if !checked.contains(&url) {
                            urls.push(url);
                        }
                    } /*else {
                        println!("https://assist.org/api/agreements?receivingInstitutionId={}&sendingInstitutionId={}&academicYearId={}&categoryCode=major", i, j, relations[i][j]);
                    }*/
                }
            }
        }
    };
    eprintln!("Urls prepared");
    let mut futs = FuturesUnordered::new();
    let mut outputs = Vec::new();
    let threads = 32;
    for url in urls {
        let fut = async move {
            let client = ClientBuilder::new().build().unwrap();
            let prefixes_status = client.get(url.clone()).send().await.unwrap().json::<Value>().await.unwrap().get("isSuccessful").unwrap().to_string().parse::<bool>().unwrap();
            let departments_status = client.get(url.clone().replace("AllPrefixes", "AllDepartments")).send().await.unwrap().json::<Value>().await.unwrap().get("isSuccessful").unwrap().to_string().parse::<bool>().unwrap();
            let majors_status = client.get(url.clone().replace("AllPrefixes", "AllMajors")).send().await.unwrap().json::<Value>().await.unwrap().get("isSuccessful").unwrap().to_string().parse::<bool>().unwrap();
            if !prefixes_status && !departments_status && !majors_status {    
                eprintln!("Error: {}", url);
                return;
            };
            let data;
            if prefixes_status {
                data = client.get(url.clone()).send().await.unwrap().json::<Value>().await.unwrap();
            } else if departments_status {
                data = client.get(url.clone().replace("AllPrefixes", "AllDepartments")).send().await.unwrap().json::<Value>().await.unwrap();
            } else {
                data = client.get(url.clone().replace("AllPrefixes", "AllMajors")).send().await.unwrap().json::<Value>().await.unwrap();
            };
            let receiving_institution  = data.get("result").unwrap().get("receivingInstitution").unwrap().to_string().replace("\\\"", "\"").replace("\\\"", "\"");
            let sending_institution  = data.get("result").unwrap().get("sendingInstitution").unwrap().to_string().replace("\\\"", "\"").replace("\\\"", "\"");
            let receiving_institution: Institution = serde_json::from_str(&receiving_institution[1..=receiving_institution.len()-2]).unwrap();
            let sending_institution: Institution = serde_json::from_str(&sending_institution[1..=sending_institution.len()-2]).unwrap();
            let mut endpoints = Endpoints { 
                receiving_institution,
                sending_institution, 
                all_majors: None,
                all_departments: None, 
                all_prefixes: None
            };
            if prefixes_status {
                eprintln!("{}", url.clone());
                let data = client.get(url.clone()).send().await.unwrap().json::<Value>().await.unwrap();
                let json_str = data.get("result").unwrap().get("articulations").unwrap().to_string().replace("\\\"", "\"").replace("\\\"", "\"");
                let articulations: Result<Vec<AllPrefix>, Error> = serde_json::from_str(&json_str[1..=json_str.len()-2]);
                if articulations.is_err() {
                    println!("{}", articulations.unwrap_err());
                    println!("{}", json_str);
                    panic!();
                }
                endpoints.all_prefixes = Some(articulations.unwrap());
            }
            if departments_status {
                eprintln!("{}", url.replace("AllPrefixes", "AllDepartments"));
                let data = client.get(url.clone().replace("AllPrefixes", "AllDepartments")).send().await.unwrap().json::<Value>().await.unwrap();
                let json_str = data.get("result").unwrap().get("articulations").unwrap().to_string().replace("\\\"", "\"").replace("\\\"", "\"");
                let articulations: Result<Vec<AllDepartment>, Error> = serde_json::from_str(&json_str[1..=json_str.len()-2]);
                if articulations.is_err() {
                    println!("{}", articulations.unwrap_err());
                    println!("{}", json_str);
                    panic!();
                }
                endpoints.all_departments = Some(articulations.unwrap());
            }
            if majors_status {
                let data = client.get(url.clone().replace("AllPrefixes", "AllMajors")).send().await.unwrap().json::<Value>().await.unwrap();
                let json_str = data.get("result").unwrap().get("articulations").unwrap().to_string().replace("\\\"", "\"").replace("\\\"", "\"");
                
                eprintln!("{}", url.replace("AllPrefixes", "AllMajors"));
                let articulations: Result<Vec<AllMajor>, Error> = serde_json::from_str(&json_str[1..=json_str.len()-2]);
                if articulations.is_err() {
                    println!("{}", articulations.unwrap_err());
                    println!("{}", json_str);
                    panic!();
                }
                endpoints.all_majors = Some(articulations.unwrap());
            }
            let json_string = serde_json::to_string_pretty(&endpoints).unwrap();
            let mut file = File::create(format!("./agreements/{}2{}.json", endpoints.sending_institution.code.trim().to_lowercase(), endpoints.receiving_institution.code.trim().to_lowercase())).unwrap();
            file.write_all(json_string.as_bytes()).unwrap();
            //agreements.lock().unwrap().push(endpoints);
            let mut file = OpenOptions::new().append(true).open("urls.txt").unwrap();
            file.write_all(url.as_bytes()).unwrap();
            file.write_all("\n".as_bytes()).unwrap();
        };
        futs.push(fut);

        if futs.len() == threads {
            outputs.push(futs.next().await.unwrap());
        }
    }
    while let Some(item) = futs.next().await {
        outputs.push(item);
    }
    eprintln!("Done downloading data");
    Ok(())
}