1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
use std::collections::HashMap;

use chrono::{DateTime, Utc};
use log::error;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use url::Url;

mod error;
mod parse_citizen_other;
mod parse_citizen_profile;
mod parse_org;
mod parse_web_bio;

pub use error::*;
use parse_citizen_other::*;
use parse_citizen_profile::*;
use parse_org::*;
use parse_web_bio::*;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Citizen {
    pub moniker: String,
    pub handle: String,
    pub title: Title,
    pub enlisted: DateTime<Utc>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub citizen_record_number: Option<u64>,
    pub avatar: Url,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub location: Vec<String>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub fluency: Vec<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub website: Option<String>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub bio: Vec<String>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct Title {
    pub icon: Url,
    pub value: String,
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum Organization {
    Visible {
        logo: Box<Url>,
        name: String,
        sid: String,
        url: Box<Url>,
        rank: OrganizationRank,
    },
    Redacted,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct OrganizationRank {
    pub name: String,
    pub value: u8,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct Organizations {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub main: Option<Organization>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub affiliates: Vec<Organization>,
}

#[derive(PartialEq, Eq, Hash, Clone)]
pub(crate) enum OrganizationType {
    Main,
    Affiliate,
}

pub fn parse_citizen_record<'a>(html_data: &str) -> Result<Citizen, ScScrapingError<'a>> {
    let html = Html::parse_document(html_data);

    let citizen_selector = Selector::parse("div.profile-content").unwrap();
    let citizen = html.select(&citizen_selector).next().ok_or_else(|| {
        let message = "Citizen content is empty";
        error!("{message}");
        ScScrapingError::citizen(message)
    })?;

    let web_bio = parse_web_bio(&citizen)?;
    let profile_info = parse_profile_info(&citizen)?;
    let other_data = parse_other(&citizen)?;

    let record = Citizen {
        moniker: profile_info.moniker,
        handle: profile_info.handle,
        title: profile_info.title,
        avatar: profile_info.avatar,

        website: web_bio.website,
        bio: web_bio.bio,

        enlisted: other_data.enlisted,
        location: other_data.location,
        fluency: other_data.languages,

        citizen_record_number: parse_citizen_record_id(&citizen),
    };

    Ok(record)
}

pub fn parse_organizations<'a>(
    html_data: &str,
) -> Result<Option<Organizations>, ScScrapingError<'a>> {
    let html = Html::parse_document(html_data);

    let org_selector = Selector::parse("div.box-content.org").unwrap();

    let mut map = HashMap::new();

    for e in html.select(&org_selector) {
        let (org_type, org) = parse_org(&e)?;

        if !map.contains_key(&org_type) {
            map.insert(org_type.clone(), Vec::new());
        }

        map.get_mut(&org_type).unwrap().push(org);
    }

    if map.is_empty() {
        return Ok(None);
    }

    if map.contains_key(&OrganizationType::Main) {
        if let Some(main) = map.get(&OrganizationType::Main) {
            if main.len() > 1 {
                let message = "Cannot have multiple main organizations";
                error!("{message}");
                return Err(ScScrapingError::organization(message));
            }
        }
    }

    let orgs = Organizations {
        main: map
            .remove(&OrganizationType::Main)
            .map(|mut o| o.pop().unwrap()),

        affiliates: map.remove(&OrganizationType::Affiliate).unwrap_or_default(),
    };

    Ok(Some(orgs))
}