1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""Regression test for issue #1395: string.concat_ws parity.
PySpark scenario (from the issue body, paraphrased):
lambda session: session.createDataFrame(
[("a", "b"), ("a", None), (None, "c")],
["a", "b"],
)
Expected PySpark behavior for:
df.select(F.concat_ws("-", "a", "b").alias("out"))
is:
[{'out': 'a-b'}, {'out': 'a'}, {'out': 'c'}]
Sparkless previously produced:
[{'out': 'a-b'}, {'out': None}, {'out': None}]
This test locks in the PySpark semantics:
- nulls are ignored (dropped) from concat_ws arguments;
- rows with all-null inputs yield null.
"""
from __future__ import annotations
def test_issue_1395_string_concat_ws_null_handling(spark, spark_imports) -> None:
F = spark_imports.F
df = spark.createDataFrame(
[("a", "b"), ("a", None), (None, "c")],
["a", "b"],
)
out = df.select(F.concat_ws("-", "a", "b").alias("out"))
rows = [{k: v for k, v in r.asDict().items()} for r in out.collect()]
assert rows == [
{"out": "a-b"},
{"out": "a"},
{"out": "c"},
]