from __future__ import annotations
import robin_sparkless as rs
def _spark():
return rs.SparkSession.builder().app_name("issue_375").get_or_create()
def test_flat_map() -> None:
spark = _spark()
df = spark.createDataFrame(
[{"word": "a b"}, {"word": "c d e"}],
schema=[("word", "string")],
)
out = df.flat_map(lambda row: [{"word": w} for w in row["word"].split()])
rows = out.collect()
assert len(rows) == 5 words = [r["word"] for r in rows]
assert words == ["a", "b", "c", "d", "e"]
def test_flat_map_empty() -> None:
spark = _spark()
df = spark.createDataFrame([{"x": 1}], schema=[("x", "int")])
out = df.flat_map(lambda row: [])
rows = out.collect()
assert len(rows) == 0