Struct spark_connect_rs::dataframe::dataframe::DataFrameWriter

source ·

pub struct DataFrameWriter { /* private fields */ }

Expand description

DataFrameWriter provides the ability to output a DataFrame to a specific file format supported by Spark

Implementations§

source §

impl DataFrameWriter

source

pub fn new(dataframe: DataFrame) -> Self

Create a new DataFrameWriter from a provided DataFrame

Defaults

format: None,
mode: SaveMode::Overwrite,
bucket_by: None,
partition_by: vec![],
sort_by: vec![],
write_options: HashMap::new()

source

pub fn format(self, format: &str) -> Self

Target format to output the DataFrame

Examples found in repository ?

examples/writer.rs (line 20)

async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;

    let df = spark
        .clone()
        .range(None, 1000, 1, Some(16))
        .selectExpr(vec!["id AS range_id"]);

    let path = "/opt/spark/examples/src/main/rust/employees/";

    df.write()
        .format("csv")
        .option("header", "true")
        .save(path)
        .await?;

    let mut df = spark
        .clone()
        .read()
        .format("csv")
        .option("header", "true")
        .load(vec![path.to_string()]);

    df.show(Some(10), None, None).await?;

    // print results may slighty vary but should be close to the below
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +--------+               |
    // | |range_id|               |
    // | +--------+               |
    // | |312     |               |
    // | |313     |               |
    // | |314     |               |
    // | |315     |               |
    // | |316     |               |
    // | |317     |               |
    // | |318     |               |
    // | |319     |               |
    // | |320     |               |
    // | |321     |               |
    // | +--------+               |
    // | only showing top 10 rows |
    // |                          |
    // +--------------------------+

    Ok(())
}

source

pub fn mode(self, mode: &str) -> Self

Specifies the behavior when data or table already exists

Arguments:

mode: (&str) translates to a specific SaveMode from the protobuf

source

pub fn bucketBy(self, num_buckets: i32, buckets: Vec<String>) -> Self

Buckets the output by the given columns. If specified, the output is laid out on the file system similar to Hive’s bucketing scheme.

source

pub fn sortBy(self, cols: Vec<String>) -> Self

Sorts the output in each bucket by the given columns on the file system

source

pub fn partitionBy(self, cols: Vec<String>) -> Self

Partitions the output by the given columns on the file system

source

pub fn option(self, key: &str, value: &str) -> Self

Add an input option for the underlying data source

Examples found in repository ?

examples/writer.rs (line 21)

async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;

    let df = spark
        .clone()
        .range(None, 1000, 1, Some(16))
        .selectExpr(vec!["id AS range_id"]);

    let path = "/opt/spark/examples/src/main/rust/employees/";

    df.write()
        .format("csv")
        .option("header", "true")
        .save(path)
        .await?;

    let mut df = spark
        .clone()
        .read()
        .format("csv")
        .option("header", "true")
        .load(vec![path.to_string()]);

    df.show(Some(10), None, None).await?;

    // print results may slighty vary but should be close to the below
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +--------+               |
    // | |range_id|               |
    // | +--------+               |
    // | |312     |               |
    // | |313     |               |
    // | |314     |               |
    // | |315     |               |
    // | |316     |               |
    // | |317     |               |
    // | |318     |               |
    // | |319     |               |
    // | |320     |               |
    // | |321     |               |
    // | +--------+               |
    // | only showing top 10 rows |
    // |                          |
    // +--------------------------+

    Ok(())
}

source

pub fn options(self, options: HashMap<String, String>) -> Self

Set many input options based on a HashMap for the underlying data source

source

pub async fn save(&mut self, path: &str) -> Result<(), ArrowError>

Save the contents of the DataFrame to a data source.

The data source is specified by the format and a set of options.

Examples found in repository ?

examples/writer.rs (line 22)

async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;

    let df = spark
        .clone()
        .range(None, 1000, 1, Some(16))
        .selectExpr(vec!["id AS range_id"]);

    let path = "/opt/spark/examples/src/main/rust/employees/";

    df.write()
        .format("csv")
        .option("header", "true")
        .save(path)
        .await?;

    let mut df = spark
        .clone()
        .read()
        .format("csv")
        .option("header", "true")
        .load(vec![path.to_string()]);

    df.show(Some(10), None, None).await?;

    // print results may slighty vary but should be close to the below
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +--------+               |
    // | |range_id|               |
    // | +--------+               |
    // | |312     |               |
    // | |313     |               |
    // | |314     |               |
    // | |315     |               |
    // | |316     |               |
    // | |317     |               |
    // | |318     |               |
    // | |319     |               |
    // | |320     |               |
    // | |321     |               |
    // | +--------+               |
    // | only showing top 10 rows |
    // |                          |
    // +--------------------------+

    Ok(())
}