import pytest
from sparkless.testing import get_imports
_imports = get_imports()
SparkSession = _imports.SparkSession
F = _imports.F
def test_select_dropped_column_raises_proper_error():
spark = SparkSession.builder.appName("test").getOrCreate()
data = [("imp_001", "2024-01-15T10:30:45.123456", "campaign_1")]
df = spark.createDataFrame(
data, ["impression_id", "impression_date", "campaign_id"]
)
df_transformed = df.withColumn(
"impression_date_parsed",
F.to_timestamp(
F.regexp_replace(F.col("impression_date"), r"\.\d+", "").cast("string"),
"yyyy-MM-dd'T'HH:mm:ss",
),
).select(
"impression_id",
"campaign_id",
"impression_date_parsed", )
assert "impression_date" not in df_transformed.columns
assert "impression_date_parsed" in df_transformed.columns
with pytest.raises(Exception) as exc_info:
df_transformed.select("impression_date")
assert "impression_date" in str(exc_info.value)
assert "impression_id" in str(exc_info.value) or "campaign_id" in str(
exc_info.value
)
def test_select_dropped_column_minimal_repro():
spark = SparkSession.builder.appName("minimal_repro").getOrCreate()
df = spark.createDataFrame([("a", "b")], ["col1", "col2"])
df_dropped = df.select("col1")
with pytest.raises(Exception) as exc_info:
df_dropped.select("col2")
assert "col2" in str(exc_info.value)
assert "col1" in str(exc_info.value)