from __future__ import annotations
import robin_sparkless as rs
def test_soundex_module_exists() -> None:
assert hasattr(rs, "soundex")
def test_with_column_soundex_returns_three_rows() -> None:
spark = rs.SparkSession.builder().app_name("soundex").get_or_create()
data = [{"name": "Alice"}, {"name": "Bob"}, {"name": "Robert"}]
schema = [("name", "string")]
create_df = getattr(spark, "create_dataframe_from_rows", None) or getattr(
spark, "_create_dataframe_from_rows"
)
df = create_df(data, schema)
out = df.with_column("snd", rs.soundex(rs.col("name"))).collect()
assert len(out) == 3
for row in out:
assert "name" in row
assert "snd" in row
assert isinstance(row["snd"], str)
assert len(row["snd"]) == 4
def test_soundex_phonetic_codes() -> None:
spark = rs.SparkSession.builder().app_name("soundex").get_or_create()
data = [{"name": "Alice"}, {"name": "Robert"}]
schema = [("name", "string")]
create_df = getattr(spark, "create_dataframe_from_rows", None) or getattr(
spark, "_create_dataframe_from_rows"
)
df = create_df(data, schema)
out = df.with_column("snd", rs.soundex(rs.col("name"))).collect()
names_to_snd = {r["name"]: r["snd"] for r in out}
assert names_to_snd["Alice"] == "A420"
assert names_to_snd["Robert"] == "R163"