robin-sparkless 4.4.0

PySpark-like DataFrame API in Rust on Polars; no JVM.
Documentation
{
  "name": "left_join",
  "pyspark_version": "3.5.0",
  "input": {
    "schema": [
      {
        "name": "dept_id",
        "type": "bigint"
      },
      {
        "name": "id",
        "type": "bigint"
      },
      {
        "name": "name",
        "type": "string"
      },
      {
        "name": "salary",
        "type": "bigint"
      }
    ],
    "rows": [
      [
        10,
        1,
        "Alice",
        50000
      ],
      [
        20,
        2,
        "Bob",
        60000
      ],
      [
        10,
        3,
        "Charlie",
        70000
      ],
      [
        30,
        4,
        "David",
        55000
      ]
    ]
  },
  "right_input": {
    "schema": [
      {
        "name": "dept_id",
        "type": "bigint"
      },
      {
        "name": "location",
        "type": "string"
      },
      {
        "name": "name",
        "type": "string"
      }
    ],
    "rows": [
      [
        10,
        "NYC",
        "IT"
      ],
      [
        20,
        "LA",
        "HR"
      ],
      [
        40,
        "Chicago",
        "Finance"
      ]
    ]
  },
  "operations": [
    {
      "op": "join",
      "on": [
        "dept_id"
      ],
      "how": "left"
    },
    {
      "op": "orderBy",
      "columns": [
        "id"
      ],
      "ascending": [
        true
      ]
    }
  ],
  "expected": {
    "schema": [
      {
        "name": "dept_id",
        "type": "bigint"
      },
      {
        "name": "id",
        "type": "bigint"
      },
      {
        "name": "name",
        "type": "string"
      },
      {
        "name": "salary",
        "type": "bigint"
      },
      {
        "name": "location",
        "type": "string"
      },
      {
        "name": "name",
        "type": "string"
      }
    ],
    "rows": [
      [
        10,
        1,
        "Alice",
        50000,
        "NYC",
        "IT"
      ],
      [
        20,
        2,
        "Bob",
        60000,
        "LA",
        "HR"
      ],
      [
        10,
        3,
        "Charlie",
        70000,
        "NYC",
        "IT"
      ],
      [
        30,
        4,
        "David",
        55000,
        null,
        null
      ]
    ]
  }
}