emval 0.1.12

emval is a blazingly fast email validator
Documentation
use polars::prelude::*;
use pyo3_polars::derive::polars_expr;
use serde::Deserialize;

use crate::models::EmailValidator;

#[derive(Deserialize)]
struct ValidateEmailKwargs {
    allow_smtputf8: bool,
    allow_empty_local: bool,
    allow_quoted_local: bool,
    allow_domain_literal: bool,
    deliverable_address: bool,
    allowed_special_domains: Vec<String>,
}

fn validate_email_struct(_input_fields: &[Field]) -> PolarsResult<Field> {
    let fields = vec![
        Field::new("original".into(), DataType::String),
        Field::new("normalized".into(), DataType::String),
        Field::new("local_part".into(), DataType::String),
        Field::new("domain_address".into(), DataType::String),
        Field::new("domain_name".into(), DataType::String),
        Field::new("is_deliverable".into(), DataType::Boolean),
    ];

    Ok(Field::new("validated".into(), DataType::Struct(fields)))
}

#[polars_expr(output_type_func=validate_email_struct)]
pub fn validate_email(inputs: &[Series], kwargs: ValidateEmailKwargs) -> PolarsResult<Series> {
    let input = &inputs[0];
    let input = input.cast(&DataType::String)?;
    let ca = input.str().unwrap();

    let email_validator = EmailValidator {
        allow_smtputf8: kwargs.allow_smtputf8,
        allow_empty_local: kwargs.allow_empty_local,
        allow_quoted_local: kwargs.allow_quoted_local,
        allow_domain_literal: kwargs.allow_domain_literal,
        deliverable_address: kwargs.deliverable_address,
        allowed_special_domains: kwargs.allowed_special_domains,
    };

    let mut original_builder = StringChunkedBuilder::new("original".into(), ca.len());
    let mut normalized_builder = StringChunkedBuilder::new("normalized".into(), ca.len());
    let mut local_part_builder = StringChunkedBuilder::new("local_part".into(), ca.len());
    let mut domain_address_builder = StringChunkedBuilder::new("domain_address".into(), ca.len());
    let mut domain_name_builder = StringChunkedBuilder::new("domain_name".into(), ca.len());
    let mut is_deliverable_builder = BooleanChunkedBuilder::new("is_deliverable".into(), ca.len());

    for email in ca.iter() {
        match email {
            Some(em) => match email_validator.validate_email(em) {
                Ok(ve) => {
                    original_builder.append_value(ve.original);
                    normalized_builder.append_value(ve.normalized);
                    local_part_builder.append_value(ve.local_part);
                    domain_address_builder
                        .append_option(ve.domain_address.map(|ip| ip.to_string()));
                    domain_name_builder.append_value(ve.domain_name);
                    is_deliverable_builder.append_value(ve.is_deliverable);
                }
                Err(_) => {
                    original_builder.append_null();
                    normalized_builder.append_null();
                    local_part_builder.append_null();
                    domain_address_builder.append_null();
                    domain_name_builder.append_null();
                    is_deliverable_builder.append_null();
                }
            },
            None => {
                original_builder.append_null();
                normalized_builder.append_null();
                local_part_builder.append_null();
                domain_address_builder.append_null();
                domain_name_builder.append_null();
                is_deliverable_builder.append_null();
            }
        }
    }

    let original = original_builder.finish();
    let normalized = normalized_builder.finish();
    let local_part = local_part_builder.finish();
    let domain_address = domain_address_builder.finish();
    let domain_name = domain_name_builder.finish();
    let is_deliverable = is_deliverable_builder.finish();

    let fields = vec![
        original.into_series(),
        normalized.into_series(),
        local_part.into_series(),
        domain_address.into_series(),
        domain_name.into_series(),
        is_deliverable.into_series(),
    ];
    StructChunked::from_series("validated".into(), ca.len(), fields.iter())
        .map(|ca| ca.into_series())
}