robin-sparkless 4.0.0

PySpark-like DataFrame API in Rust on Polars; no JVM.
Documentation
"""
Tests for issue #273: to_timestamp() on string column (PySpark parity).

PySpark to_timestamp(col("ts_str")) parses common formats like "2024-01-01 10:00:00".
With format, PySpark parses e.g. "2024-01-01T10:00:00" with "yyyy-MM-dd'T'HH:mm:ss".
Robin previously failed without format or returned None with format.
"""

from __future__ import annotations


from tests.fixtures.spark_imports import get_spark_imports

_imports = get_spark_imports()
F = _imports.F


def test_to_timestamp_string_no_format(spark) -> None:
    """to_timestamp(col('ts_str')) without format parses 'YYYY-MM-DD HH:MM:SS'."""
    df = spark.createDataFrame(
        [{"ts_str": "2024-01-01 10:00:00"}],
        ["ts_str"],
    )
    df = df.withColumn("ts", F.to_timestamp(F.col("ts_str")))
    rows = df.collect()
    assert len(rows) == 1
    assert rows[0]["ts"] is not None
    ts = rows[0]["ts"]
    assert "2024-01-01" in str(ts) and "10:00:00" in str(ts)


def test_to_timestamp_string_with_format(spark) -> None:
    """to_timestamp(col('ts_str'), \"yyyy-MM-dd'T'HH:mm:ss\") parses ISO-like strings."""
    df = spark.createDataFrame(
        [{"ts_str": "2024-01-01T10:00:00"}],
        ["ts_str"],
    )
    df = df.withColumn(
        "ts",
        F.to_timestamp(F.col("ts_str"), "yyyy-MM-dd'T'HH:mm:ss"),
    )
    rows = df.collect()
    assert len(rows) == 1
    assert rows[0]["ts"] is not None
    ts = rows[0]["ts"]
    assert "2024-01-01" in str(ts) and "10:00:00" in str(ts)