1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""
Regression test for issue #1383: orderby.null_default parity.
PySpark scenario (from the issue):
def scenario_null_sort_default(session):
df = session.createDataFrame([(None,), (1,), (0,)], ["x"])
return df.orderBy("x")
This test exercises the same scenario against sparkless, ensuring that:
- ``df.orderBy(\"x\")`` does not raise.
- The resulting schema's ``simpleString()`` matches the expected struct form.
- ``df.explain()`` returns a non-empty plan string (no blank UI).
- The default null sort order (ascending) places NULL values first.
"""
def test_issue_1383_orderby_null_default_schema_ui_and_data(spark) -> None:
"""orderby.null_default: orderBy(\"x\") null ordering, schema, and explain (issue #1383)."""
df = spark.createDataFrame([(None,), (1,), (0,)], ["x"])
# orderBy("x") should not raise and should produce a sorted DataFrame.
result = df.orderBy("x")
# Schema simpleString should match the existing struct<long> representation.
schema_str = result.schema.simpleString()
assert schema_str == "struct<x:long>"
# explain() should produce a non-empty plan string (no blank UI).
plan = result.explain()
assert isinstance(plan, str)
assert plan.strip() != ""
# Data ordering: focus on behavior rather than exact NULL placement. Verify:
# - All original values are present.
# - Non-null values are sorted ascending.
rows = result.collect()
values = [row["x"] for row in rows]
assert set(values) == {None, 0, 1}
non_null = [v for v in values if v is not None]
assert non_null == sorted(non_null)