formulate 1.2.0

formulate is a standalone server that listens for web form data submissions.
use regex::Regex;
use rocket::{figment::providers::Env, response::status::BadRequest, serde::Deserialize, Config};
use std::sync::LazyLock;

type SpamCheckResult = Result<(), BadRequest<String>>;

/// Configuration structure for spam blocking features
#[derive(Deserialize)]
#[serde(crate = "rocket::serde")]
struct SpamConfig {
    /// A list of domain names which are always allowed in form message submissions.
    /// If set, even if set empty, this will take precedence over the blocklist.
    /// This means that if the domain names listed here do not match those found in
    /// the form message, the submission will be considered spam.
    allowlist: Option<Vec<String>>,
    /// List of domain names. The presence of *any* of these in the form submission's message body is used
    /// as a positive signal for the message being spam.
    blocklist: Option<Vec<String>>,
    /// Whether to enable spam blocking. If true, the StopForumSpam API will be used to check if the contact
    /// address submitted with the form.
    blocking: Option<bool>,
}

// fspamlist also has API, but it requires API key.
#[derive(Deserialize)]
#[serde(crate = "rocket::serde")]
struct SFSEmailResponse {
    /// email address being queried for.
    value: String,
    /// Whether or not email appears in the SFS database
    appears: usize,
    // /// Number of times email appears in the SFS database
    // frequency: usize,
    // lastseen: Option<String>,
    // /// Statistically calculated score, based on the last seen date and the number of sightings
    // confidence: Option<f64>,
}

/// Configuration structure for stop forum spam
///
/// Use of ureq's into_json() prevents us from borrowing for string slices here.
#[derive(Deserialize)]
#[serde(crate = "rocket::serde")]
struct StopForumSpamJsonResponse {
    success: u8,
    email: SFSEmailResponse,
    error: Option<String>,
}

static URL_RE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"([\w+]+\:\/\/)?([\w\d-]+\.)*[\w-]+[\.\:]\w+([\/\?\=\&\#\.]?[\w-]+)*\/?")
        .expect("Invalid regular expression provided.")
});

// Does not pick up config changes.
static SPAM_CONFIG: LazyLock<rocket::figment::Figment> = LazyLock::new(|| {
    Config::figment()
        .select("spam")
        .merge(Env::prefixed("FORMULATE_SPAM_"))
});

/// Checks if email being used has a high confidence of being a form spammer.
/// If Result is OK then API check failed, or email not found in spam database.
/// If Result is an error, a positive match was found against the spam database
/// for this submitted email.
pub fn check_stop_forum_spam(form_email: &str, error_msg: &str) -> SpamCheckResult {
    let config: SpamConfig = match SPAM_CONFIG.extract::<SpamConfig>() {
        Ok(config) => config,
        Err(err) => return Err(BadRequest(err.to_string())),
    };

    if config.blocking.is_none() {
        return Ok(());
    }

    if config.blocking.unwrap() {
        let sfs_api_url = format!("http://api.stopforumspam.org/api?email={form_email}&json");
        let result = ureq::post(&sfs_api_url)
            .set("Content-Type", "application/x-www-form-urlencoded")
            .call();

        if result.is_err() {
            // If there was an error communicating with the API we don't want to throw
            // an error (an error means a match was found in the spam db).
            return Ok(());
        }
        let result = result
            .expect("Error getting API response.")
            .into_json::<StopForumSpamJsonResponse>();

        if result.is_err() {
            // If there was an error obtaining the API response we don't want to throw
            // an error (an error means a match was found in the spam db)
            return Ok(());
        }
        let result = result.unwrap();

        if result.error.is_none()
            && result.success == 1
            && result.email.appears > 0
            && result.email.value.eq(form_email)
        {
            Err(BadRequest(error_msg.to_string()))
        } else {
            Ok(())
        }
    } else {
        Ok(())
    }
}

/// Checks a (form submission) message for matches against a list of domains. If matches
/// are found, an error is returned which ultimately ends form processing. If no matches
/// are found or no list is available, the normal control flow continues.
pub fn check_for_spam_blocklist(form_message: &str, error_msg: &str) -> SpamCheckResult {
    match SPAM_CONFIG.extract::<SpamConfig>() {
        Ok(config) => {
            if config.blocklist.is_none() {
                Ok(())
            } else {
                let looks_like_spam = config
                    .blocklist
                    .unwrap()
                    .iter()
                    .any(|url| form_message.contains(url));
                if looks_like_spam {
                    Err(BadRequest(error_msg.to_string()))
                } else {
                    Ok(())
                }
            }
        }
        Err(config_err) => Err(BadRequest(config_err.to_string())),
    }
}

pub fn check_for_spam_allowlist(form_message: &str, error_msg: &str) -> SpamCheckResult {
    if let Ok(config) = SPAM_CONFIG.extract::<SpamConfig>() {
        if let Some(allowlist) = &config.allowlist {
            let has_only_allowed_urls = URL_RE
                .find_iter(form_message)
                .all(|url| allowlist.iter().any(|domain| url.as_str().contains(domain)));
            if has_only_allowed_urls {
                return Ok(());
            }
            return Err(BadRequest(error_msg.to_string()));
        }
    }

    Ok(())
}