class TestIssue361CreateDataFrameRdd:
def test_createDataFrame_from_rdd_with_schema_list(self, spark):
df = spark.createDataFrame(
[
{"Name": "Alice", "Value": 1},
{"Name": "Bob", "Value": 10},
]
)
df2 = spark.createDataFrame(df.rdd, schema=["Name", "Value"])
rows = df2.collect()
assert len(rows) == 2
assert rows[0]["Name"] == "Alice" and rows[0]["Value"] == 1
assert rows[1]["Name"] == "Bob" and rows[1]["Value"] == 10
def test_createDataFrame_from_rdd_show_matches_expected(self, spark):
df = spark.createDataFrame(
[
{"Name": "Alice", "Value": 1},
{"Name": "Bob", "Value": 10},
]
)
df2 = spark.createDataFrame(df.rdd, schema=["Name", "Value"])
df2.show()
assert df2.count() == 2
def test_createDataFrame_from_rdd_empty_dataframe(self, spark):
from tests.fixtures.spark_imports import get_spark_imports
imports = get_spark_imports()
schema = imports.StructType(
[
imports.StructField("Name", imports.StringType()),
imports.StructField("Value", imports.IntegerType()),
]
)
df = spark.createDataFrame([], schema=schema)
df2 = spark.createDataFrame(df.rdd, schema=schema)
assert df2.count() == 0
assert df2.columns == ["Name", "Value"]
def test_createDataFrame_from_rdd_single_row(self, spark):
df = spark.createDataFrame([{"a": 1, "b": "x"}])
df2 = spark.createDataFrame(df.rdd, schema=["a", "b"])
rows = df2.collect()
assert len(rows) == 1
assert rows[0]["a"] == 1 and rows[0]["b"] == "x"
def test_createDataFrame_from_rdd_preserves_schema_order(self, spark):
df = spark.createDataFrame(
[{"z": 3, "y": 2, "x": 1}],
schema=["x", "y", "z"],
)
df2 = spark.createDataFrame(df.rdd, schema=["x", "y", "z"])
assert df2.columns == ["x", "y", "z"]
row = df2.collect()[0]
assert row["x"] == 1 and row["y"] == 2 and row["z"] == 3