from sparkless.testing import get_imports, Mode, get_mode
def _is_sparkless_mode() -> bool:
return get_mode() == Mode.SPARKLESS
class TestColumnAvailability:
def test_materialized_columns_are_available(self, spark):
df = spark.createDataFrame([{"id": 1, "value": 10}], ["id", "value"])
assert "id" in df.columns
assert "value" in df.columns
if _is_sparkless_mode() and hasattr(df, "_get_available_columns"):
available = df._get_available_columns()
assert "id" in available
assert "value" in available
def test_columns_available_after_collect(self, spark):
imports = get_imports()
F = imports.F
StructType = imports.StructType
StructField = imports.StructField
IntegerType = imports.IntegerType
schema = StructType(
[
StructField("id", IntegerType(), True),
StructField("value", IntegerType(), True),
]
)
df = spark.createDataFrame([{"id": 1, "value": 10}], schema=schema)
df = df.withColumn("new_col", F.col("value") + 1)
df.collect()
assert "new_col" in df.columns
if _is_sparkless_mode() and hasattr(df, "_get_available_columns"):
available = df._get_available_columns()
assert "new_col" in available
def test_columns_available_after_show(self, spark):
imports = get_imports()
F = imports.F
StructType = imports.StructType
StructField = imports.StructField
IntegerType = imports.IntegerType
schema = StructType(
[
StructField("id", IntegerType(), True),
StructField("value", IntegerType(), True),
]
)
df = spark.createDataFrame([{"id": 1, "value": 10}], schema=schema)
df = df.withColumn("new_col", F.col("value") + 1)
df.show()
assert "new_col" in df.columns
if _is_sparkless_mode() and hasattr(df, "_get_available_columns"):
available = df._get_available_columns()
assert "new_col" in available
def test_dataframe_is_marked_materialized(self, spark):
imports = get_imports()
F = imports.F
StructType = imports.StructType
StructField = imports.StructField
IntegerType = imports.IntegerType
schema = StructType(
[
StructField("id", IntegerType(), True),
StructField("value", IntegerType(), True),
]
)
df = spark.createDataFrame([{"id": 1, "value": 10}], schema=schema)
if _is_sparkless_mode() and hasattr(df, "_materialized"):
assert df._materialized is True
df2 = df.withColumn("new", F.col("value") + 1)
df2.collect()
assert "new" in df2.columns
if _is_sparkless_mode() and hasattr(df2, "_materialized"):
assert df2._materialized is True