1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Regression test for issue #1398: numeric.floor schema parity.
Scenario (from the issue, paraphrased):
df = session.createDataFrame([(1.9,), (-1.1,), (None,)], ["x"])
df.select(F.floor("x").alias("out"))
PySpark schema:
struct<out:bigint>
Sparkless schema (before fix):
struct<out:double>
This test locks in PySpark parity for the output schema: floor() should yield
an integral type (bigint in PySpark, LongType in Sparkless).
"""
from __future__ import annotations
def test_issue_1398_numeric_floor_schema_is_long(spark, spark_imports) -> None:
F = spark_imports.F
LongType = spark_imports.LongType
df = spark.createDataFrame(
[(1.9,), (-1.1,), (None,)],
["x"],
)
out = df.select(F.floor("x").alias("out"))
# Value semantics: floor matches PySpark (sanity check).
rows = [r["out"] for r in out.collect()]
assert rows == [1, -2, None]
# Schema parity: floor outputs an integral type. Sparkless uses LongType()
# with simpleString \"long\"; PySpark reports bigint. We assert LongType
# here, and higher-level parity harness can remap \"long\" vs \"bigint\".
field = out.schema.fields[0]
assert isinstance(field.dataType, LongType)