uda-connector 0.1.6

A Rust lib to access UDA (unicycling registration software)
Documentation
use crate::error::UdaError;
use crate::error::UdaError::{CantReadPageContent, ConnectionFailed};
use crate::Result;
use log::warn;
use reqwest::Client;
use scraper::{ElementRef, Html, Selector};
use uda_dto::uda_instance::Instance;

/// Retrieve a list of all UDA instances.
pub async fn retrieve_uda_instances(
    client: &Client,
    instances_list_url: &str,
) -> Result<Vec<Instance>> {
    let response = client
        .get(instances_list_url)
        .send()
        .await
        .map_err(|_| ConnectionFailed)?;
    let status = response.status();
    if !status.is_success() {
        Err(CantReadPageContent)?;
    }

    let body = response.text().await.map_err(|_| CantReadPageContent)?;

    let instances = get_uda_instances_from_html(&body)?;
    Ok(instances)
}

fn get_uda_instances_from_html(body: &str) -> Result<Vec<Instance>> {
    let selector = Selector::parse(r"tr").map_err(UdaError::from)?;
    let document = Html::parse_document(body);

    let rows = document.select(&selector);

    Ok(rows.flat_map(|x| get_uda_instance_from_row(&x)).collect())
}

fn get_uda_instance_from_row(row: &ElementRef) -> Option<Instance> {
    let selector = Selector::parse("td").ok()?;
    let cells = row.select(&selector).collect::<Vec<_>>();

    let (subdomain_cell, name_cell) = match cells[..] {
        [subdomain_cell, name_cell, _creation_date] => (subdomain_cell, name_cell),
        _ => {
            warn!("Ignoring instance because wrongly formatted [row: {row:?}]");
            return None;
        }
    };

    let selector = Selector::parse("a").ok()?;
    let link_element = subdomain_cell.select(&selector).next()?;
    let link = link_element.attr("href")?;
    let slug = link_element.text().next()?;
    let name = name_cell.text().next()?;

    Some(Instance::new(
        slug.to_owned(),
        name.to_owned(),
        link.to_owned(),
    ))
}

#[cfg(any(test, feature = "test"))]
pub const BODY: &str = r##"<html class=""><head></head><body><div id="container"><div id="main"><h1>Existing Conventions</h1><table><thead><tr><th>Subdomain</th><th>Description</th><th>Created At</th></tr></thead><tbody><tr><td><a href="https://mt-ventoux-2023.reg.unicycling-software.com">mt-ventoux-2023</a></td><td>Mount Ventoux Unicycle Challenge</td><td>Wed, 03 May 2023 15:12:52 -0500</td></tr><tr><td><a href="https://cfm2025.reg.unicycling-software.com">cfm2025</a></td><td>CFM 2025</td><td>Mon, 27 Jan 2025 13:13:52 -0600</td></tr></tbody></table><hr><a class="button" href="/tenants/new?locale=en">New Convention</a></div></div></body></html>"##;
#[cfg(any(test, feature = "test"))]
pub fn get_expected_instances() -> Vec<Instance> {
    vec![
        Instance::new(
            "mt-ventoux-2023".to_owned(),
            "Mount Ventoux Unicycle Challenge".to_owned(),
            "https://mt-ventoux-2023.reg.unicycling-software.com".to_owned(),
        ),
        Instance::new(
            "cfm2025".to_owned(),
            "CFM 2025".to_owned(),
            "https://cfm2025.reg.unicycling-software.com".to_owned(),
        ),
    ]
}

#[cfg(test)]
pub mod tests {
    const MALFORMED_BODY: &str = r##"<html class=""><head></head><body><div id="container"><div id="main"><h1>Existing Conventions</h1><table><thead><tr><th>Subdomain</th><th>Description</th><th>Created At</th></tr></thead><tbody><tr><td><a href="https://mt-ventoux-2023.reg.unicycling-software.com">mt-ventoux-2023</a></td><td>Mount Ventoux Unicycle Challenge</td><td>Wed, 03 May 2023 15:12:52 -0500</td></tr><tr><td><a href="https://cfm2025.reg.unicycling-software.com">cfm2025</a></td><td>CFM 2025</td></tr></tbody></table><hr><a class="button" href="/tenants/new?locale=en">New Convention</a></div></div></body></html>"##;

    mod retrieve_uda_instances {
        use crate::error::UdaError;
        use crate::instances::{get_expected_instances, retrieve_uda_instances, BODY};
        use crate::tools::tests::build_client;
        use reqwest::header::LOCATION;
        use wiremock::matchers::{method, path};
        use wiremock::{Mock, MockServer, ResponseTemplate};

        #[tokio::test]
        async fn success() {
            let mock_server = MockServer::start().await;

            Mock::given(method("GET"))
                .and(path("tenants"))
                .respond_with(ResponseTemplate::new(200).set_body_string(BODY))
                .mount(&mock_server)
                .await;

            let client = build_client().unwrap();
            let instances_list_url = format!("{}/tenants?locale=en", mock_server.uri());
            let instances = retrieve_uda_instances(&client, &instances_list_url)
                .await
                .unwrap();

            assert_eq!(get_expected_instances(), instances);
        }

        #[tokio::test]
        async fn fail_when_connection_failed() {
            let mock_server = MockServer::start().await;

            Mock::given(method("GET"))
                .and(path("tenants"))
                .respond_with(
                    ResponseTemplate::new(301).append_header(LOCATION, "/tenants?locale=en"),
                )
                .mount(&mock_server)
                .await;

            let client = build_client().unwrap();
            let instances_list_url = format!("{}/tenants?locale=en", mock_server.uri());
            let error = retrieve_uda_instances(&client, &instances_list_url)
                .await
                .unwrap_err();

            println!("{error:?}");

            assert!(matches!(error, UdaError::ConnectionFailed));
        }

        #[tokio::test]
        async fn fail_when_cant_read_page_content() {
            let mock_server = MockServer::start().await;

            Mock::given(method("GET"))
                .and(path("tenants"))
                .respond_with(ResponseTemplate::new(500))
                .mount(&mock_server)
                .await;

            let client = build_client().unwrap();
            let instances_list_url = format!("{}/tenants?locale=en", mock_server.uri());
            let error = retrieve_uda_instances(&client, &instances_list_url)
                .await
                .unwrap_err();

            assert!(matches!(error, UdaError::CantReadPageContent));
        }

        mod get_uda_instances_from_html {
            use crate::instances::{get_expected_instances, get_uda_instances_from_html, Instance, BODY};
            use crate::instances::tests::MALFORMED_BODY;

            #[test]
            fn success() {
                let instances = get_uda_instances_from_html(BODY).unwrap();
                assert_eq!(get_expected_instances(), instances);
            }

            #[test]
            fn get_only_one_out_of_two_when_one_malformed() {
                let expected_instance = Instance::new(
                    "mt-ventoux-2023".to_owned(),
                    "Mount Ventoux Unicycle Challenge".to_owned(),
                    "https://mt-ventoux-2023.reg.unicycling-software.com".to_owned(),
                );

                let instances = get_uda_instances_from_html(MALFORMED_BODY).unwrap();
                assert_eq!(vec![expected_instance], instances);
            }
        }

        mod get_uda_instance_from_row {
            use crate::instances::get_uda_instance_from_row;
            use scraper::{Html, Selector};

            #[test]
            fn success() {
                let body = r#"<html><head></head><body><table><tr><td><a href="https://cfm2025.reg.unicycling-software.com">cfm2025</a></td><td>CFM 2025</td><td>Mon, 27 Jan 2025 13:13:52 -0600</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                let instance = get_uda_instance_from_row(&row).unwrap();
                assert_eq!("cfm2025", instance.slug());
                assert_eq!("CFM 2025", instance.name());
                assert_eq!(
                    "https://cfm2025.reg.unicycling-software.com",
                    instance.url()
                );
            }

            #[test]
            fn fail_when_missing_cells() {
                let body = r#"<html><head></head><body><table><tr><td><a href="https://cfm2025.reg.unicycling-software.com">cfm2025</a></td><td>CFM 2025</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                assert!(get_uda_instance_from_row(&row).is_none());
            }

            #[test]
            fn fail_when_too_many_cells() {
                let body = r#"<html><head></head><body><table><tr><td><a href="https://cfm2025.reg.unicycling-software.com">cfm2025</a></td><td>CFM 2025</td><td>Mon, 27 Jan 2025 13:13:52 -0600</td><td>Just another cell</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                assert!(get_uda_instance_from_row(&row).is_none());
            }

            #[test]
            fn fail_when_no_link_element() {
                let body = r#"<html><head></head><body><table><tr><td>cfm2025</td><td>CFM 2025</td><td>Mon, 27 Jan 2025 13:13:52 -0600</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                assert!(get_uda_instance_from_row(&row).is_none());
            }

            #[test]
            fn fail_when_no_href_attr() {
                let body = r#"<html><head></head><body><table><tr><td><a>cfm2025</a></td><td>CFM 2025</td><td>Mon, 27 Jan 2025 13:13:52 -0600</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                assert!(get_uda_instance_from_row(&row).is_none());
            }

            #[test]
            fn fail_when_no_name() {
                let body = r#"<html><head></head><body><table><tr><td><a href="https://cfm2025.reg.unicycling-software.com">cfm2025</a></td><td></td><td>Mon, 27 Jan 2025 13:13:52 -0600</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                assert!(get_uda_instance_from_row(&row).is_none());
            }

            #[test]
            fn fail_when_no_slug() {
                let body = r#"<html><head></head><body><table><tr><td><a href="https://cfm2025.reg.unicycling-software.com"></a></td><td>CFM 2025</td><td>Mon, 27 Jan 2025 13:13:52 -0600</td></tr></tbody></table></body></html>"#;
                let selector = Selector::parse(r"tr").unwrap();
                let document = Html::parse_document(body);

                let row = document.select(&selector).next().unwrap();
                assert!(get_uda_instance_from_row(&row).is_none());
            }
        }
    }
}