Struct spark_connect_rs::column::Column

source ·
pub struct Column {
    pub expression: Expression,
}
Expand description

§Column

A column holds a specific spark::Expression which will be resolved once an action is called. The columns are resolved by the Spark Connect server of the remote session.

A column instance can be created by in a similar way as to the Spark API. A column with created with col("*") or col("name.*") is created as an unresolved star attribute which will select all columns or references in the specified column.

use spark_connect_rs::{SparkSession, SparkSessionBuilder};

let spark: SparkSession = SparkSessionBuilder::remote("sc://127.0.0.1:15002/;user_id=example_rs".to_string())
        .build()
        .await?;

// As a &str representing an unresolved column in the dataframe
spark.range(None, 1, 1, Some(1)).select("id");

// By using the `col` function
spark.range(None, 1, 1, Some(1)).select(col("id"));

// By using the `lit` function to return a literal value
spark.range(None, 1, 1, Some(1)).select(lit(4.0).alias("num_col"));

Fields§

§expression: Expression

a spark::Expression containing any unresolved value to be leveraged in a spark::Plan

Implementations§

source§

impl Column

source

pub fn alias(self, value: &str) -> Column

Returns the column with a new name

§Example:
let cols = [
    col("name").alias("new_name"),
    col("age").alias("new_age")
];

df.select(cols);
Examples found in repository?
examples/reader.rs (line 24)
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;

    let path = ["/opt/spark/examples/src/main/resources/people.csv"];

    let df = spark
        .read()
        .format("csv")
        .option("header", "True")
        .option("delimiter", ";")
        .load(path)?;

    df.select([
        F::col("name"),
        F::col("age").cast("int").alias("age_int"),
        (F::lit(3.0) + F::col("age").cast("int")).alias("addition"),
    ])
    .sort(vec![F::col("name").desc()])
    .show(Some(5), None, None)
    .await?;

    // print results
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +-----+-------+--------+ |
    // | |name |age_int|addition| |
    // | +-----+-------+--------+ |
    // | |Jorge|30     |33.0    | |
    // | |Bob  |32     |35.0    | |
    // | +-----+-------+--------+ |
    // |                          |
    // +--------------------------+

    Ok(())
}
More examples
Hide additional examples
examples/writer.rs (line 21)
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::remote("sc://127.0.0.1:15002/")
        .build()
        .await?;

    let df = spark
        .clone()
        .range(None, 1000, 1, Some(16))
        .select(col("id").alias("range_id"));

    let path = "/opt/spark/examples/src/main/rust/employees/";

    df.write()
        .format("csv")
        .mode(SaveMode::Overwrite)
        .option("header", "true")
        .save(path)
        .await?;

    let df = spark
        .clone()
        .read()
        .format("csv")
        .option("header", "true")
        .load([path])?;

    df.show(Some(10), None, None).await?;

    // print results may slighty vary but should be close to the below
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +--------+               |
    // | |range_id|               |
    // | +--------+               |
    // | |312     |               |
    // | |313     |               |
    // | |314     |               |
    // | |315     |               |
    // | |316     |               |
    // | |317     |               |
    // | |318     |               |
    // | |319     |               |
    // | |320     |               |
    // | |321     |               |
    // | +--------+               |
    // | only showing top 10 rows |
    // |                          |
    // +--------------------------+

    Ok(())
}
source

pub fn name(self, value: &str) -> Column

An alias for the function alias

source

pub fn asc(self) -> Column

Returns a sorted expression based on the ascending order of the column

§Example:
let df: DataFrame = df.sort(col("id").asc());

let df: DataFrame = df.sort(asc(col("id")));
source

pub fn asc_nulls_first(self) -> Column

source

pub fn asc_nulls_last(self) -> Column

source

pub fn desc(self) -> Column

Returns a sorted expression based on the ascending order of the column

§Example:
let df: DataFrame = df.sort(col("id").desc());

let df: DataFrame = df.sort(desc(col("id")));
Examples found in repository?
examples/reader.rs (line 27)
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;

    let path = ["/opt/spark/examples/src/main/resources/people.csv"];

    let df = spark
        .read()
        .format("csv")
        .option("header", "True")
        .option("delimiter", ";")
        .load(path)?;

    df.select([
        F::col("name"),
        F::col("age").cast("int").alias("age_int"),
        (F::lit(3.0) + F::col("age").cast("int")).alias("addition"),
    ])
    .sort(vec![F::col("name").desc()])
    .show(Some(5), None, None)
    .await?;

    // print results
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +-----+-------+--------+ |
    // | |name |age_int|addition| |
    // | +-----+-------+--------+ |
    // | |Jorge|30     |33.0    | |
    // | |Bob  |32     |35.0    | |
    // | +-----+-------+--------+ |
    // |                          |
    // +--------------------------+

    Ok(())
}
source

pub fn desc_nulls_first(self) -> Column

source

pub fn desc_nulls_last(self) -> Column

source

pub fn cast(self, to_type: &str) -> Column

Casts the column into the Spark type represented as a &str

§Arguments:
  • to_type is the string representation of the datatype
§Example:
let df = df.select([
      col("age").cast("int"),
      col("name").cast("string")
    ])
Examples found in repository?
examples/reader.rs (line 24)
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let spark: SparkSession = SparkSessionBuilder::default().build().await?;

    let path = ["/opt/spark/examples/src/main/resources/people.csv"];

    let df = spark
        .read()
        .format("csv")
        .option("header", "True")
        .option("delimiter", ";")
        .load(path)?;

    df.select([
        F::col("name"),
        F::col("age").cast("int").alias("age_int"),
        (F::lit(3.0) + F::col("age").cast("int")).alias("addition"),
    ])
    .sort(vec![F::col("name").desc()])
    .show(Some(5), None, None)
    .await?;

    // print results
    // +--------------------------+
    // | show_string              |
    // +--------------------------+
    // | +-----+-------+--------+ |
    // | |name |age_int|addition| |
    // | +-----+-------+--------+ |
    // | |Jorge|30     |33.0    | |
    // | |Bob  |32     |35.0    | |
    // | +-----+-------+--------+ |
    // |                          |
    // +--------------------------+

    Ok(())
}
source

pub fn isin<T>(self, cols: Vec<T>) -> Column
where T: ToLiteralExpr,

A boolean expression that is evaluated to true if the value of the expression is contained by the evaluated values of the arguments

§Arguments:
§Example:
df.filter(col("name").isin(["Jorge", "Bob"]));
source

pub fn contains<T>(self, other: T) -> Column
where T: ToLiteralExpr,

A boolean expression that is evaluated to true if the value is in the Column

§Arguments:
§Example:
df.filter(col("name").contains("ge"));
source

pub fn startswith<T>(self, other: T) -> Column
where T: ToLiteralExpr,

A filter expression that evaluates if the column startswith a string literal

source

pub fn endswith<T>(self, other: T) -> Column
where T: ToLiteralExpr,

A filter expression that evaluates if the column endswith a string literal

source

pub fn like<T>(self, other: T) -> Column
where T: ToLiteralExpr,

A SQL LIKE filter expression that evaluates the column based on a case sensitive match

source

pub fn ilike<T>(self, other: T) -> Column
where T: ToLiteralExpr,

A SQL ILIKE filter expression that evaluates the column based on a case insensitive match

source

pub fn rlike<T>(self, other: T) -> Column
where T: ToLiteralExpr,

A SQL RLIKE filter expression that evaluates the column based on a regex match

source

pub fn eq<T>(self, other: T) -> Column
where T: ToExpr,

Equality comparion. Cannot overload the ‘==’ and return something other than a bool

source

pub fn and<T>(self, other: T) -> Column
where T: ToExpr,

Logical AND comparion. Cannot overload the ‘&&’ and return something other than a bool

source

pub fn or<T>(self, other: T) -> Column
where T: ToExpr,

Logical OR comparion.

source

pub fn isNull(self) -> Column

A filter expression that evaluates to true is the expression is null

source

pub fn isNotNull(self) -> Column

A filter expression that evaluates to true is the expression is NOT null

source

pub fn isNaN(self) -> Column

source

pub fn over(self, window: WindowSpec) -> Column

Defines a windowing column

§Arguments:
§Example
let window = Window::new()
    .partitionBy(col("name"))
    .orderBy([col("age")])
    .rangeBetween(Window::unboundedPreceding(), Window::currentRow());

let df = df.withColumn("rank", rank().over(window.clone()))
    .withColumn("min", min("age").over(window));

Trait Implementations§

source§

impl Add for Column

§

type Output = Column

The resulting type after applying the + operator.
source§

fn add(self, other: Column) -> Column

Performs the + operation. Read more
source§

impl BitAnd for Column

§

type Output = Column

The resulting type after applying the & operator.
source§

fn bitand(self, other: Column) -> Column

Performs the & operation. Read more
source§

impl BitOr for Column

§

type Output = Column

The resulting type after applying the | operator.
source§

fn bitor(self, other: Column) -> Column

Performs the | operation. Read more
source§

impl BitXor for Column

§

type Output = Column

The resulting type after applying the ^ operator.
source§

fn bitxor(self, other: Column) -> Column

Performs the ^ operation. Read more
source§

impl Clone for Column

source§

fn clone(&self) -> Column

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Column

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
source§

impl Div for Column

§

type Output = Column

The resulting type after applying the / operator.
source§

fn div(self, other: Column) -> Column

Performs the / operation. Read more
source§

impl From<&str> for Column

source§

fn from(value: &str) -> Column

&str values containing a * will be created as an unresolved star expression Otherwise, the value is created as an unresolved attribute

source§

impl From<Expression> for Column

source§

fn from(expression: Expression) -> Column

Used for creating columns from a spark::Expression

source§

impl Mul for Column

§

type Output = Column

The resulting type after applying the * operator.
source§

fn mul(self, other: Column) -> Column

Performs the * operation. Read more
source§

impl Neg for Column

§

type Output = Column

The resulting type after applying the - operator.
source§

fn neg(self) -> Column

Performs the unary - operation. Read more
source§

impl Not for Column

§

type Output = Column

The resulting type after applying the ! operator.
source§

fn not(self) -> <Column as Not>::Output

Performs the unary ! operation. Read more
source§

impl Rem for Column

§

type Output = Column

The resulting type after applying the % operator.
source§

fn rem(self, other: Column) -> Column

Performs the % operation. Read more
source§

impl Sub for Column

§

type Output = Column

The resulting type after applying the - operator.
source§

fn sub(self, other: Column) -> Column

Performs the - operation. Read more
source§

impl ToExpr for Column

source§

impl ToFilterExpr for Column

source§

impl ToLiteralExpr for Column

Auto Trait Implementations§

§

impl Freeze for Column

§

impl RefUnwindSafe for Column

§

impl Send for Column

§

impl Sync for Column

§

impl Unpin for Column

§

impl UnwindSafe for Column

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> FromRef<T> for T
where T: Clone,

source§

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.
source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoRequest<T> for T

source§

fn into_request(self) -> Request<T>

Wrap the input message T in a tonic::Request
source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T> ToVecExpr for T
where T: ToExpr,

source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V

source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

source§

impl<T, Rhs, Output> NumOps<Rhs, Output> for T
where T: Sub<Rhs, Output = Output> + Mul<Rhs, Output = Output> + Div<Rhs, Output = Output> + Add<Rhs, Output = Output> + Rem<Rhs, Output = Output>,