1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
use spark_connect_rs;
use spark_connect_rs::{SparkSession, SparkSessionBuilder};
// This example demonstrates creating a Spark DataFrame from range()
// changing the column name, writing the results to a CSV
// then reading the csv file back
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;
    let df = spark
        .clone()
        .range(None, 1000, 1, Some(16))
        .selectExpr(vec!["id AS range_id"]);
    let path = "/opt/spark/examples/src/main/rust/employees/";
    df.write()
        .format("csv")
        .option("header", "true")
        .save(path)
        .await?;
    let mut df = spark
        .clone()
        .read()
        .format("csv")
        .option("header", "true")
        .load(vec![path.to_string()]);
    df.show(Some(10), None, None).await?;
    // print results may slighty vary but should be close to the below
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +--------+               |
    // | |range_id|               |
    // | +--------+               |
    // | |312     |               |
    // | |313     |               |
    // | |314     |               |
    // | |315     |               |
    // | |316     |               |
    // | |317     |               |
    // | |318     |               |
    // | |319     |               |
    // | |320     |               |
    // | |321     |               |
    // | +--------+               |
    // | only showing top 10 rows |
    // |                          |
    // +--------------------------+
    Ok(())
}