{
"test_id": "string_split",
"pyspark_version": "3.5",
"generated_at": "2025-10-27T14:13:11.974645",
"input_data": [
{
"id": 1,
"name": "Alice",
"age": 25,
"salary": 50000.0,
"active": true,
"email": "alice@example.com"
},
{
"id": 2,
"name": "Bob",
"age": 30,
"salary": 60000.0,
"active": false,
"email": "bob@test.com"
},
{
"id": 3,
"name": "Charlie",
"age": 35,
"salary": 70000.0,
"active": true,
"email": "charlie@company.org"
}
],
"operation": "DataFrame operation: string_split",
"expected_output": {
"schema": {
"field_count": 1,
"field_names": [
"split(email, @, -1)"
],
"field_types": [
"array"
],
"fields": [
{
"name": "split(email, @, -1)",
"type": "array",
"nullable": true
}
]
},
"data": [
{
"split(email, @, -1)": [
"alice",
"example.com"
]
},
{
"split(email, @, -1)": [
"bob",
"test.com"
]
},
{
"split(email, @, -1)": [
"charlie",
"company.org"
]
}
],
"row_count": 3
}
}