Struct spark_connect_rs::dataframe::dataframe::DataFrameWriter
source · pub struct DataFrameWriter { /* private fields */ }
Expand description
DataFrameWriter provides the ability to output a DataFrame to a specific file format supported by Spark
Implementations§
source§impl DataFrameWriter
impl DataFrameWriter
sourcepub fn new(dataframe: DataFrame) -> Self
pub fn new(dataframe: DataFrame) -> Self
Create a new DataFrameWriter from a provided DataFrame
Defaults
format
: None,mode
: SaveMode::Overwrite,bucket_by
: None,partition_by
: vec![],sort_by
: vec![],write_options
: HashMap::new()
sourcepub fn format(self, format: &str) -> Self
pub fn format(self, format: &str) -> Self
Target format to output the DataFrame
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.selectExpr(vec!["id AS range_id"]);
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.option("header", "true")
.save(path)
.await?;
let mut df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load(vec![path.to_string()]);
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}
sourcepub fn mode(self, mode: &str) -> Self
pub fn mode(self, mode: &str) -> Self
Specifies the behavior when data or table already exists
Arguments:
mode
: (&str) translates to a specific SaveMode from the protobuf
sourcepub fn bucketBy(self, num_buckets: i32, buckets: Vec<String>) -> Self
pub fn bucketBy(self, num_buckets: i32, buckets: Vec<String>) -> Self
Buckets the output by the given columns. If specified, the output is laid out on the file system similar to Hive’s bucketing scheme.
sourcepub fn sortBy(self, cols: Vec<String>) -> Self
pub fn sortBy(self, cols: Vec<String>) -> Self
Sorts the output in each bucket by the given columns on the file system
sourcepub fn partitionBy(self, cols: Vec<String>) -> Self
pub fn partitionBy(self, cols: Vec<String>) -> Self
Partitions the output by the given columns on the file system
sourcepub fn option(self, key: &str, value: &str) -> Self
pub fn option(self, key: &str, value: &str) -> Self
Add an input option for the underlying data source
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.selectExpr(vec!["id AS range_id"]);
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.option("header", "true")
.save(path)
.await?;
let mut df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load(vec![path.to_string()]);
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}
sourcepub fn options(self, options: HashMap<String, String>) -> Self
pub fn options(self, options: HashMap<String, String>) -> Self
Set many input options based on a HashMap for the underlying data source
sourcepub async fn save(&mut self, path: &str) -> Result<(), ArrowError>
pub async fn save(&mut self, path: &str) -> Result<(), ArrowError>
Save the contents of the DataFrame to a data source.
The data source is specified by the format
and a set of options
.
Examples found in repository?
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.selectExpr(vec!["id AS range_id"]);
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.option("header", "true")
.save(path)
.await?;
let mut df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load(vec![path.to_string()]);
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}
Auto Trait Implementations§
impl !RefUnwindSafe for DataFrameWriter
impl Send for DataFrameWriter
impl Sync for DataFrameWriter
impl Unpin for DataFrameWriter
impl !UnwindSafe for DataFrameWriter
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> Instrument for T
impl<T> Instrument for T
source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request