import robin_sparkless as rs
def test_cast_empty_and_whitespace_string_to_int() -> None:
spark = rs.SparkSession.builder().app_name("test").get_or_create()
df = spark.createDataFrame(
[{"text": ""}, {"text": " "}],
[("text", "string")],
)
result = df.with_column("n", rs.col("text").cast("int"))
rows = result.collect()
assert len(rows) == 2
assert rows[0]["text"] == ""
assert rows[0]["n"] is None
assert rows[1]["text"] == " "
assert rows[1]["n"] is None
def test_cast_invalid_strings_to_int_null() -> None:
spark = rs.SparkSession.builder().app_name("test").get_or_create()
df = spark.createDataFrame(
[{"s": "hello"}, {"s": "abc123"}, {"s": ""}, {"s": "42"}],
[("s", "string")],
)
result = df.with_column("n", rs.col("s").cast("int"))
rows = result.collect()
assert len(rows) == 4
assert rows[0]["n"] is None
assert rows[1]["n"] is None
assert rows[2]["n"] is None
assert rows[3]["n"] == 42
def test_try_cast_invalid_to_int_null() -> None:
spark = rs.SparkSession.builder().app_name("test").get_or_create()
df = spark.createDataFrame(
[{"s": "1"}, {"s": "x"}],
[("s", "string")],
)
result = df.with_column("n", rs.try_cast(rs.col("s"), "int"))
rows = result.collect()
assert rows[0]["n"] == 1
assert rows[1]["n"] is None
def test_cast_valid_string_to_long() -> None:
spark = rs.SparkSession.builder().app_name("test").get_or_create()
df = spark.createDataFrame(
[{"s": "9999999999"}, {"s": "bad"}],
[("s", "string")],
)
result = df.with_column("n", rs.col("s").cast("long"))
rows = result.collect()
assert rows[0]["n"] == 9999999999
assert rows[1]["n"] is None