import polars as pl
import pytest
from emval import EmailValidator
from emval.polars import validate_email
@pytest.fixture
def validator():
return EmailValidator(
allow_smtputf8=True,
allow_empty_local=False,
allow_quoted_local=False,
allow_domain_literal=False,
deliverable_address=True,
allowed_special_domains=[],
)
def test_valid_email_validation(validator):
df = pl.DataFrame({"email": ["user@example.com"]})
result = df.with_columns(
validated=validate_email(
pl.col("email"),
allow_smtputf8=True,
allow_empty_local=False,
allow_quoted_local=False,
allow_domain_literal=False,
deliverable_address=True,
allowed_special_domains=[]
)
)
result = result.with_columns(
original=pl.col("validated").struct.field("original"),
normalized=pl.col("validated").struct.field("normalized"),
is_deliverable=pl.col("validated").struct.field("is_deliverable"),
)
assert result.get_column("original")[0] is None
assert result.get_column("is_deliverable")[0] is None
def test_invalid_email_validation(validator):
df = pl.DataFrame({"email": ["invalid-email"]})
result = df.with_columns(
validated=validate_email(
pl.col("email"),
allow_smtputf8=True,
allow_empty_local=False,
allow_quoted_local=False,
allow_domain_literal=False,
deliverable_address=True,
allowed_special_domains=[]
)
)
result = result.with_columns(
original=pl.col("validated").struct.field("original"),
is_deliverable=pl.col("validated").struct.field("is_deliverable"),
)
assert result.get_column("original")[0] is None
assert result.get_column("is_deliverable")[0] is None
def test_empty_email_validation(validator):
df = pl.DataFrame({"email": [""]})
result = df.with_columns(
validated=validate_email(
pl.col("email"),
allow_smtputf8=True,
allow_empty_local=False,
allow_quoted_local=False,
allow_domain_literal=False,
deliverable_address=True,
allowed_special_domains=[]
)
)
result = result.with_columns(
original=pl.col("validated").struct.field("original"),
is_deliverable=pl.col("validated").struct.field("is_deliverable"),
)
assert result.get_column("original")[0] is None
assert result.get_column("is_deliverable")[0] is None
def test_multiple_emails_validation(validator):
df = pl.DataFrame(
{"email": ["user1@example.com", "invalid-email", "user2@domain.com", ""]}
)
result = df.with_columns(
validated=validate_email(
pl.col("email"),
allow_smtputf8=True,
allow_empty_local=False,
allow_quoted_local=False,
allow_domain_literal=False,
deliverable_address=True,
allowed_special_domains=[]
)
)
result = result.with_columns(
original=pl.col("validated").struct.field("original"),
is_deliverable=pl.col("validated").struct.field("is_deliverable"),
)
assert len(result) == 4
assert result.get_column("is_deliverable")[0] is None
assert result.get_column("is_deliverable")[1] is None
assert result.get_column("is_deliverable")[2] is True
assert result.get_column("is_deliverable")[3] is None