Struct spark_connect_rs::readwriter::DataFrameWriter
source · pub struct DataFrameWriter { /* private fields */ }Expand description
DataFrameWriter provides the ability to output a DataFrame to a specific file format supported by Spark
Implementations§
source§impl DataFrameWriter
impl DataFrameWriter
sourcepub fn new(dataframe: DataFrame) -> Self
pub fn new(dataframe: DataFrame) -> Self
Create a new DataFrameWriter from a provided DataFrame
§Defaults
format: None,mode: SaveMode::Overwrite,bucket_by: None,partition_by: vec![],sort_by: vec![],write_options: HashMap::new()
sourcepub fn format(self, format: &str) -> Self
pub fn format(self, format: &str) -> Self
Target format to output the DataFrame
Examples found in repository?
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.select(col("id").alias("range_id"));
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.option("header", "true")
.save(path)
.await?;
let mut df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load(vec![path.to_string()]);
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}sourcepub fn bucketBy(self, num_buckets: i32, buckets: Vec<String>) -> Self
pub fn bucketBy(self, num_buckets: i32, buckets: Vec<String>) -> Self
Buckets the output by the given columns. If specified, the output is laid out on the file system similar to Hive’s bucketing scheme.
sourcepub fn sortBy(self, cols: Vec<String>) -> Self
pub fn sortBy(self, cols: Vec<String>) -> Self
Sorts the output in each bucket by the given columns on the file system
sourcepub fn partitionBy(self, cols: Vec<String>) -> Self
pub fn partitionBy(self, cols: Vec<String>) -> Self
Partitions the output by the given columns on the file system
sourcepub fn option(self, key: &str, value: &str) -> Self
pub fn option(self, key: &str, value: &str) -> Self
Add an input option for the underlying data source
Examples found in repository?
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.select(col("id").alias("range_id"));
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.option("header", "true")
.save(path)
.await?;
let mut df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load(vec![path.to_string()]);
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}sourcepub fn options(self, options: HashMap<String, String>) -> Self
pub fn options(self, options: HashMap<String, String>) -> Self
Set many input options based on a HashMap for the underlying data source
sourcepub async fn save(&mut self, path: &str) -> Result<(), ArrowError>
pub async fn save(&mut self, path: &str) -> Result<(), ArrowError>
Save the contents of the DataFrame to a data source.
The data source is specified by the format and a set of options.
Examples found in repository?
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.select(col("id").alias("range_id"));
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.option("header", "true")
.save(path)
.await?;
let mut df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load(vec![path.to_string()]);
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}sourcepub async fn saveAsTable(&mut self, table_name: &str) -> Result<(), ArrowError>
pub async fn saveAsTable(&mut self, table_name: &str) -> Result<(), ArrowError>
Saves the context of the DataFrame as the specified table.
sourcepub async fn insertInto(&mut self, table_name: &str) -> Result<(), ArrowError>
pub async fn insertInto(&mut self, table_name: &str) -> Result<(), ArrowError>
Auto Trait Implementations§
impl !RefUnwindSafe for DataFrameWriter
impl Send for DataFrameWriter
impl Sync for DataFrameWriter
impl Unpin for DataFrameWriter
impl !UnwindSafe for DataFrameWriter
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> Instrument for T
impl<T> Instrument for T
source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request