from __future__ import annotations
from tests.fixtures.spark_imports import get_spark_imports
_imports = get_spark_imports()
SparkSession = _imports.SparkSession
F = _imports.F
def _spark() -> SparkSession:
return SparkSession.builder.appName("issue_415").getOrCreate()
def test_array_distinct_string_list() -> None:
spark = _spark()
df = spark.createDataFrame(
[{"arr": ["a", "b", "a"]}],
"arr array<string>",
)
out = df.select(F.array_distinct(F.col("arr")).alias("arr")).collect()
assert len(out) == 1
assert out[0]["arr"] == ["a", "b"]
def test_array_distinct_with_array_string_schema() -> None:
spark = _spark()
df = spark.createDataFrame(
[{"arr": ["x", "y", "x"]}, {"arr": ["p", "q", "q", "p"]}],
schema="arr array<string>",
)
out = df.select(F.array_distinct(F.col("arr")).alias("arr")).collect()
assert len(out) == 2
assert out[0]["arr"] == ["x", "y"]
assert out[1]["arr"] == ["p", "q"]