Struct spark_connect_rs::column::Column
source · pub struct Column {
pub expression: Expression,
}
Expand description
§Column
A column holds a specific spark::Expression which will be resolved once an action is called. The columns are resolved by the Spark Connect server of the remote session.
A column instance can be created by in a similar way as to the Spark API. A column with created
with col("*")
or col("name.*")
is created as an unresolved star attribute which will select
all columns or references in the specified column.
use spark_connect_rs::{SparkSession, SparkSessionBuilder};
let spark: SparkSession = SparkSessionBuilder::remote("sc://127.0.0.1:15002/;user_id=example_rs".to_string())
.build()
.await?;
// As a &str representing an unresolved column in the dataframe
spark.range(None, 1, 1, Some(1)).select("id");
// By using the `col` function
spark.range(None, 1, 1, Some(1)).select(col("id"));
// By using the `lit` function to return a literal value
spark.range(None, 1, 1, Some(1)).select(lit(4.0).alias("num_col"));
Fields§
§expression: Expression
a spark::Expression containing any unresolved value to be leveraged in a spark::Plan
Implementations§
source§impl Column
impl Column
sourcepub fn alias(self, value: &str) -> Column
pub fn alias(self, value: &str) -> Column
Returns the column with a new name
§Example:
let cols = [
col("name").alias("new_name"),
col("age").alias("new_age")
];
df.select(cols);
Examples found in repository?
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let path = ["/opt/spark/examples/src/main/resources/people.csv"];
let df = spark
.read()
.format("csv")
.option("header", "True")
.option("delimiter", ";")
.load(path)?;
df.select([
F::col("name"),
F::col("age").cast("int").alias("age_int"),
(F::lit(3.0) + F::col("age").cast("int")).alias("addition"),
])
.sort(vec![F::col("name").desc()])
.show(Some(5), None, None)
.await?;
// print results
// +--------------------------+
// | show_string |
// +--------------------------+
// | +-----+-------+--------+ |
// | |name |age_int|addition| |
// | +-----+-------+--------+ |
// | |Jorge|30 |33.0 | |
// | |Bob |32 |35.0 | |
// | +-----+-------+--------+ |
// | |
// +--------------------------+
Ok(())
}
More examples
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::remote("sc://127.0.0.1:15002/")
.build()
.await?;
let df = spark
.clone()
.range(None, 1000, 1, Some(16))
.select(col("id").alias("range_id"));
let path = "/opt/spark/examples/src/main/rust/employees/";
df.write()
.format("csv")
.mode(SaveMode::Overwrite)
.option("header", "true")
.save(path)
.await?;
let df = spark
.clone()
.read()
.format("csv")
.option("header", "true")
.load([path])?;
df.show(Some(10), None, None).await?;
// print results may slighty vary but should be close to the below
// +--------------------------+
// | show_string |
// +--------------------------+
// | +--------+ |
// | |range_id| |
// | +--------+ |
// | |312 | |
// | |313 | |
// | |314 | |
// | |315 | |
// | |316 | |
// | |317 | |
// | |318 | |
// | |319 | |
// | |320 | |
// | |321 | |
// | +--------+ |
// | only showing top 10 rows |
// | |
// +--------------------------+
Ok(())
}
sourcepub fn asc(self) -> Column
pub fn asc(self) -> Column
Returns a sorted expression based on the ascending order of the column
§Example:
let df: DataFrame = df.sort(col("id").asc());
let df: DataFrame = df.sort(asc(col("id")));
pub fn asc_nulls_first(self) -> Column
pub fn asc_nulls_last(self) -> Column
sourcepub fn desc(self) -> Column
pub fn desc(self) -> Column
Returns a sorted expression based on the ascending order of the column
§Example:
let df: DataFrame = df.sort(col("id").desc());
let df: DataFrame = df.sort(desc(col("id")));
Examples found in repository?
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let path = ["/opt/spark/examples/src/main/resources/people.csv"];
let df = spark
.read()
.format("csv")
.option("header", "True")
.option("delimiter", ";")
.load(path)?;
df.select([
F::col("name"),
F::col("age").cast("int").alias("age_int"),
(F::lit(3.0) + F::col("age").cast("int")).alias("addition"),
])
.sort(vec![F::col("name").desc()])
.show(Some(5), None, None)
.await?;
// print results
// +--------------------------+
// | show_string |
// +--------------------------+
// | +-----+-------+--------+ |
// | |name |age_int|addition| |
// | +-----+-------+--------+ |
// | |Jorge|30 |33.0 | |
// | |Bob |32 |35.0 | |
// | +-----+-------+--------+ |
// | |
// +--------------------------+
Ok(())
}
pub fn desc_nulls_first(self) -> Column
pub fn desc_nulls_last(self) -> Column
sourcepub fn cast(self, to_type: &str) -> Column
pub fn cast(self, to_type: &str) -> Column
Casts the column into the Spark type represented as a &str
§Arguments:
to_type
is the string representation of the datatype
§Example:
let df = df.select([
col("age").cast("int"),
col("name").cast("string")
])
Examples found in repository?
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let spark: SparkSession = SparkSessionBuilder::default().build().await?;
let path = ["/opt/spark/examples/src/main/resources/people.csv"];
let df = spark
.read()
.format("csv")
.option("header", "True")
.option("delimiter", ";")
.load(path)?;
df.select([
F::col("name"),
F::col("age").cast("int").alias("age_int"),
(F::lit(3.0) + F::col("age").cast("int")).alias("addition"),
])
.sort(vec![F::col("name").desc()])
.show(Some(5), None, None)
.await?;
// print results
// +--------------------------+
// | show_string |
// +--------------------------+
// | +-----+-------+--------+ |
// | |name |age_int|addition| |
// | +-----+-------+--------+ |
// | |Jorge|30 |33.0 | |
// | |Bob |32 |35.0 | |
// | +-----+-------+--------+ |
// | |
// +--------------------------+
Ok(())
}
sourcepub fn isin<T>(self, cols: Vec<T>) -> Columnwhere
T: ToLiteralExpr,
pub fn isin<T>(self, cols: Vec<T>) -> Columnwhere
T: ToLiteralExpr,
A boolean expression that is evaluated to true
if the value of the expression is
contained by the evaluated values of the arguments
§Arguments:
cols
a value that implements the ToLiteralExpr trait
§Example:
df.filter(col("name").isin(["Jorge", "Bob"]));
sourcepub fn contains<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
pub fn contains<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
A boolean expression that is evaluated to true
if the value is in the Column
§Arguments:
cols
: a col reference that is translated into an spark::Expression
§Example:
df.filter(col("name").contains("ge"));
sourcepub fn startswith<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
pub fn startswith<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
A filter expression that evaluates if the column startswith a string literal
sourcepub fn endswith<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
pub fn endswith<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
A filter expression that evaluates if the column endswith a string literal
sourcepub fn like<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
pub fn like<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
A SQL LIKE filter expression that evaluates the column based on a case sensitive match
sourcepub fn ilike<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
pub fn ilike<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
A SQL ILIKE filter expression that evaluates the column based on a case insensitive match
sourcepub fn rlike<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
pub fn rlike<T>(self, other: T) -> Columnwhere
T: ToLiteralExpr,
A SQL RLIKE filter expression that evaluates the column based on a regex match
sourcepub fn eq<T>(self, other: T) -> Columnwhere
T: ToExpr,
pub fn eq<T>(self, other: T) -> Columnwhere
T: ToExpr,
Equality comparion. Cannot overload the ‘==’ and return something other than a bool
sourcepub fn and<T>(self, other: T) -> Columnwhere
T: ToExpr,
pub fn and<T>(self, other: T) -> Columnwhere
T: ToExpr,
Logical AND comparion. Cannot overload the ‘&&’ and return something other than a bool
sourcepub fn isNull(self) -> Column
pub fn isNull(self) -> Column
A filter expression that evaluates to true is the expression is null
sourcepub fn isNotNull(self) -> Column
pub fn isNotNull(self) -> Column
A filter expression that evaluates to true is the expression is NOT null
pub fn isNaN(self) -> Column
sourcepub fn over(self, window: WindowSpec) -> Column
pub fn over(self, window: WindowSpec) -> Column
Defines a windowing column
§Arguments:
window
: a WindowSpec
§Example
let window = Window::new()
.partitionBy(col("name"))
.orderBy([col("age")])
.rangeBetween(Window::unboundedPreceding(), Window::currentRow());
let df = df.withColumn("rank", rank().over(window.clone()))
.withColumn("min", min("age").over(window));
Trait Implementations§
source§impl From<Expression> for Column
impl From<Expression> for Column
source§fn from(expression: Expression) -> Column
fn from(expression: Expression) -> Column
Used for creating columns from a spark::Expression
source§impl ToFilterExpr for Column
impl ToFilterExpr for Column
fn to_filter_expr(&self) -> Option<Expression>
source§impl ToLiteralExpr for Column
impl ToLiteralExpr for Column
fn to_literal_expr(&self) -> Expression
Auto Trait Implementations§
impl Freeze for Column
impl RefUnwindSafe for Column
impl Send for Column
impl Sync for Column
impl Unpin for Column
impl UnwindSafe for Column
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> Instrument for T
impl<T> Instrument for T
source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request