Struct Column

Source

pub struct Column { /* private fields */ }

Expand description

Column - represents a column in a DataFrame, used for building expressions Thin wrapper around Polars Expr. May carry a DeferredRandom for rand/randn so with_column can produce one value per row.

Implementations§

Source §

impl Column

Source

pub fn new(name: String) -> Self

Create a new Column from a column name

Source

pub fn from_expr(expr: Expr, name: Option<String>) -> Self

Create a Column from a Polars Expr

Source

pub fn from_rand(seed: Option<u64>) -> Self

Create a Column for rand(seed). When used in with_column, generates one value per row (PySpark-like).

Source

pub fn from_randn(seed: Option<u64>) -> Self

Create a Column for randn(seed). When used in with_column, generates one value per row (PySpark-like).

Source

pub fn expr(&self) -> &Expr

Get the underlying Polars Expr

Source

pub fn into_expr(self) -> Expr

Convert to Polars Expr (consumes self)

Source

pub fn name(&self) -> &str

Get the column name

Source

pub fn alias(&self, name: &str) -> Column

Alias the column

Source

pub fn asc(&self) -> SortOrder

Ascending sort, nulls first (Spark default for ASC). PySpark asc.

Source

pub fn asc_nulls_first(&self) -> SortOrder

Ascending sort, nulls first. PySpark asc_nulls_first.

Source

pub fn asc_nulls_last(&self) -> SortOrder

Ascending sort, nulls last. PySpark asc_nulls_last.

Source

pub fn desc(&self) -> SortOrder

Descending sort, nulls last (Spark default for DESC). PySpark desc.

Source

pub fn desc_nulls_first(&self) -> SortOrder

Descending sort, nulls first. PySpark desc_nulls_first.

Source

pub fn desc_nulls_last(&self) -> SortOrder

Descending sort, nulls last. PySpark desc_nulls_last.

Source

pub fn is_null(&self) -> Column

Check if column is null

Source

pub fn is_not_null(&self) -> Column

Check if column is not null

Source

pub fn isnull(&self) -> Column

Alias for is_null. PySpark isnull.

Source

pub fn isnotnull(&self) -> Column

Alias for is_not_null. PySpark isnotnull.

Source

pub fn like(&self, pattern: &str, escape_char: Option<char>) -> Column

SQL LIKE pattern matching (% = any chars, _ = one char). PySpark like. When escape_char is Some(esc), esc + char treats that char as literal (e.g. \% = literal %).

Source

pub fn ilike(&self, pattern: &str, escape_char: Option<char>) -> Column

Case-insensitive LIKE. PySpark ilike. When escape_char is Some(esc), esc + char treats that char as literal.

Source

pub fn eq_pyspark(&self, other: &Column) -> Column

PySpark-style equality comparison (NULL == NULL returns NULL, not True) Any comparison involving NULL returns NULL

Explicitly wraps comparisons with null checks to ensure PySpark semantics. If either side is NULL, the result is NULL.

Source

pub fn ne_pyspark(&self, other: &Column) -> Column

PySpark-style inequality comparison (NULL != NULL returns NULL, not False) Any comparison involving NULL returns NULL

Source

pub fn eq_null_safe(&self, other: &Column) -> Column

Null-safe equality (NULL <=> NULL returns True) PySpark’s eqNullSafe() method

Source

pub fn gt_pyspark(&self, other: &Column) -> Column

PySpark-style greater-than comparison (NULL > value returns NULL) Any comparison involving NULL returns NULL

Source

pub fn ge_pyspark(&self, other: &Column) -> Column

PySpark-style greater-than-or-equal comparison Any comparison involving NULL returns NULL

Source

pub fn lt_pyspark(&self, other: &Column) -> Column

PySpark-style less-than comparison Any comparison involving NULL returns NULL

Source

pub fn le_pyspark(&self, other: &Column) -> Column

PySpark-style less-than-or-equal comparison Any comparison involving NULL returns NULL

Source

pub fn gt(&self, other: Expr) -> Column

Greater than comparison

Source

pub fn gt_eq(&self, other: Expr) -> Column

Greater than or equal comparison

Source

pub fn lt(&self, other: Expr) -> Column

Less than comparison

Source

pub fn lt_eq(&self, other: Expr) -> Column

Less than or equal comparison

Source

pub fn eq(&self, other: Expr) -> Column

Equality comparison

Source

pub fn neq(&self, other: Expr) -> Column

Inequality comparison

Source

pub fn upper(&self) -> Column

Convert string column to uppercase (PySpark upper)

Source

pub fn lower(&self) -> Column

Convert string column to lowercase (PySpark lower)

Source

pub fn lcase(&self) -> Column

Alias for lower. PySpark lcase.

Source

pub fn ucase(&self) -> Column

Alias for upper. PySpark ucase.

Source

pub fn substr(&self, start: i64, length: Option<i64>) -> Column

Substring with 1-based start (PySpark substring semantics)

Source

pub fn length(&self) -> Column

String length in characters (PySpark length)

Source

pub fn bit_length(&self) -> Column

Bit length of string in bytes * 8 (PySpark bit_length).

Source

pub fn octet_length(&self) -> Column

Length of string in bytes (PySpark octet_length).

Source

pub fn char_length(&self) -> Column

Length of string in characters (PySpark char_length). Alias of length().

Source

pub fn character_length(&self) -> Column

Length of string in characters (PySpark character_length). Alias of length().

Source

pub fn encode(&self, charset: &str) -> Column

Encode string to binary (PySpark encode). Charset: UTF-8. Returns hex string.

Source

pub fn decode(&self, charset: &str) -> Column

Decode binary (hex string) to string (PySpark decode). Charset: UTF-8.

Source

pub fn to_binary(&self, fmt: &str) -> Column

Convert to binary (PySpark to_binary). fmt: ‘utf-8’, ‘hex’. Returns hex string.

Source

pub fn try_to_binary(&self, fmt: &str) -> Column

Try convert to binary; null on failure (PySpark try_to_binary).

Source

pub fn aes_encrypt(&self, key: &str) -> Column

AES encrypt (PySpark aes_encrypt). Key as string; AES-128-GCM. Output hex(nonce||ciphertext).

Source

pub fn aes_decrypt(&self, key: &str) -> Column

AES decrypt (PySpark aes_decrypt). Input hex(nonce||ciphertext). Null on failure.

Source

pub fn try_aes_decrypt(&self, key: &str) -> Column

Try AES decrypt (PySpark try_aes_decrypt). Returns null on failure.

Source

pub fn typeof_(&self) -> Column

Data type as string (PySpark typeof). Uses dtype from schema.

Source

pub fn trim(&self) -> Column

Trim leading and trailing whitespace (PySpark trim)

Source

pub fn ltrim(&self) -> Column

Trim leading whitespace (PySpark ltrim)

Source

pub fn rtrim(&self) -> Column

Trim trailing whitespace (PySpark rtrim)

Source

pub fn btrim(&self, trim_str: Option<&str>) -> Column

Trim leading and trailing characters (PySpark btrim). trim_str defaults to whitespace.

Source

pub fn locate(&self, substr: &str, pos: i64) -> Column

Find substring position 1-based, starting at pos (PySpark locate). 0 if not found.

Source

pub fn conv(&self, from_base: i32, to_base: i32) -> Column

Base conversion (PySpark conv). num_str from from_base to to_base.

Source

pub fn hex(&self) -> Column

Convert to hex string (PySpark hex). Int or string input.

Source

pub fn unhex(&self) -> Column

Convert hex string to binary/string (PySpark unhex).

Source

pub fn bin(&self) -> Column

Convert integer to binary string (PySpark bin).

Source

pub fn getbit(&self, pos: i64) -> Column

Get bit at 0-based position (PySpark getbit).

Source

pub fn bit_and(&self, other: &Column) -> Column

Bitwise AND of two integer/boolean columns (PySpark bit_and).

Source

pub fn bit_or(&self, other: &Column) -> Column

Bitwise OR of two integer/boolean columns (PySpark bit_or).

Source

pub fn bit_xor(&self, other: &Column) -> Column

Bitwise XOR of two integer/boolean columns (PySpark bit_xor).

Source

pub fn bit_count(&self) -> Column

Count of set bits in the integer representation (PySpark bit_count).

Source

pub fn assert_true(&self, err_msg: Option<&str>) -> Column

Assert that all boolean values are true; errors otherwise (PySpark assert_true). When err_msg is Some, it is used in the error message when assertion fails.

Source

pub fn bitwise_not(&self) -> Column

Bitwise NOT of an integer/boolean column (PySpark bitwise_not / bitwiseNOT).

Source

pub fn str_to_map(&self, pair_delim: &str, key_value_delim: &str) -> Column

Parse string to map (PySpark str_to_map). “k1:v1,k2:v2” -> map.

Source

pub fn regexp_extract(&self, pattern: &str, group_index: usize) -> Column

Extract first match of regex pattern (PySpark regexp_extract). Group 0 = full match.

Source

pub fn regexp_replace(&self, pattern: &str, replacement: &str) -> Column

Replace first match of regex pattern (PySpark regexp_replace). literal=false for regex.

Source

pub fn left(&self, n: i64) -> Column

Leftmost n characters (PySpark left).

Source

pub fn right(&self, n: i64) -> Column

Rightmost n characters (PySpark right).

Source

pub fn replace(&self, search: &str, replacement: &str) -> Column

Replace all occurrences of literal search string with replacement (PySpark replace for literal).

Source

pub fn startswith(&self, prefix: &str) -> Column

True if string starts with prefix (PySpark startswith).

Source

pub fn endswith(&self, suffix: &str) -> Column

True if string ends with suffix (PySpark endswith).

Source

pub fn contains(&self, substring: &str) -> Column

True if string contains substring (literal, not regex). PySpark contains.

Source

pub fn split(&self, delimiter: &str) -> Column

Split string by delimiter (PySpark split). Returns list of strings. Uses literal split so “|” is not interpreted as regex alternation.

Source

pub fn initcap(&self) -> Column

Title case: first letter of each word uppercase (PySpark initcap). Approximates with lowercase when Polars to_titlecase is not enabled.

Source

pub fn regexp_extract_all(&self, pattern: &str) -> Column

Extract all matches of regex (PySpark regexp_extract_all). Returns list of strings.

Source

pub fn regexp_like(&self, pattern: &str) -> Column

Check if string matches regex (PySpark regexp_like / rlike).

Source

pub fn regexp_count(&self, pattern: &str) -> Column

Count of non-overlapping regex matches (PySpark regexp_count).

Source

pub fn regexp_substr(&self, pattern: &str) -> Column

First substring matching regex (PySpark regexp_substr). Null if no match.

Source

pub fn regexp_instr(&self, pattern: &str, group_idx: Option<usize>) -> Column

1-based position of first regex match (PySpark regexp_instr). group_idx 0 = full match; null if no match.

Source

pub fn find_in_set(&self, set_column: &Column) -> Column

1-based index of self in comma-delimited set column (PySpark find_in_set). 0 if not found or self contains comma.

Source

pub fn repeat(&self, n: i32) -> Column

Repeat string column n times (PySpark repeat). Each element repeated n times.

Source

pub fn reverse(&self) -> Column

Reverse string (PySpark reverse).

Source

pub fn instr(&self, substr: &str) -> Column

Find substring position (1-based; 0 if not found). PySpark instr(col, substr).

Source

pub fn lpad(&self, length: i32, pad: &str) -> Column

Left-pad string to length with pad character (PySpark lpad).

Source

pub fn rpad(&self, length: i32, pad: &str) -> Column

Right-pad string to length with pad character (PySpark rpad).

Source

pub fn translate(&self, from_str: &str, to_str: &str) -> Column

Character-by-character translation (PySpark translate). Replaces each char in from_str with corresponding in to_str; if to_str is shorter, extra from chars are removed.

Source

pub fn mask( &self, upper_char: Option<char>, lower_char: Option<char>, digit_char: Option<char>, other_char: Option<char>, ) -> Column

Mask string: replace uppercase with upper_char, lowercase with lower_char, digits with digit_char (PySpark mask). Defaults: upper ‘X’, lower ‘x’, digit ‘n’; other chars unchanged.

Source

pub fn split_part(&self, delimiter: &str, part_num: i64) -> Column

Split by delimiter and return 1-based part (PySpark split_part). part_num > 0: from left; part_num < 0: from right; part_num = 0: null; out-of-range: empty string.

Source

pub fn substring_index(&self, delimiter: &str, count: i64) -> Column

Substring before/after nth delimiter (PySpark substring_index). count > 0: before nth from left; count < 0: after nth from right.

Source

pub fn soundex(&self) -> Column

Soundex code (PySpark soundex). Implemented via map UDF (strsim/soundex crates).

Source

pub fn levenshtein(&self, other: &Column) -> Column

Levenshtein distance to another string (PySpark levenshtein). Implemented via map_many UDF (strsim).

Source

pub fn crc32(&self) -> Column

CRC32 checksum of string bytes (PySpark crc32). Implemented via map UDF (crc32fast).

Source

pub fn xxhash64(&self) -> Column

XXH64 hash of string (PySpark xxhash64). Implemented via map UDF (twox-hash).

Source

pub fn ascii(&self) -> Column

ASCII value of first character (PySpark ascii). Returns Int32.

Source

pub fn format_number(&self, decimals: u32) -> Column

Format numeric as string with fixed decimal places (PySpark format_number).

Source

pub fn char(&self) -> Column

Int to single-character string (PySpark char / chr). Valid codepoint only.

Source

pub fn chr(&self) -> Column

Alias for char (PySpark chr).

Source

pub fn base64(&self) -> Column

Base64 encode string bytes (PySpark base64).

Source

pub fn unbase64(&self) -> Column

Base64 decode to string (PySpark unbase64). Invalid decode → null.

Source

pub fn sha1(&self) -> Column

SHA1 hash of string bytes, return hex string (PySpark sha1).

Source

pub fn sha2(&self, bit_length: i32) -> Column

SHA2 hash; bit_length 256, 384, or 512 (PySpark sha2). Default 256.

Source

pub fn md5(&self) -> Column

MD5 hash of string bytes, return hex string (PySpark md5).

Source

pub fn overlay(&self, replace: &str, pos: i64, length: i64) -> Column

Replace substring at 1-based position (PySpark overlay). replace is literal string.

Source

pub fn abs(&self) -> Column

Absolute value (PySpark abs)

Source

pub fn ceil(&self) -> Column

Ceiling (PySpark ceil)

Source

pub fn ceiling(&self) -> Column

Alias for ceil. PySpark ceiling.

Source

pub fn floor(&self) -> Column

Floor (PySpark floor)

Source

pub fn round(&self, decimals: u32) -> Column

Round to given decimal places (PySpark round)

Source

pub fn bround(&self, scale: i32) -> Column

Banker’s rounding - round half to even (PySpark bround).

Source

pub fn negate(&self) -> Column

Unary minus (PySpark negate, negative).

Source

pub fn multiply(&self, other: &Column) -> Column

Multiply by another column or literal (PySpark multiply). Broadcasts scalars.

Source

pub fn add(&self, other: &Column) -> Column

Add another column or literal (PySpark +). Broadcasts scalars.

Source

pub fn subtract(&self, other: &Column) -> Column

Subtract another column or literal (PySpark -). Broadcasts scalars.

Source

pub fn divide(&self, other: &Column) -> Column

Divide by another column or literal (PySpark /). Broadcasts scalars.

Source

pub fn mod_(&self, other: &Column) -> Column

Modulo (PySpark %). Broadcasts scalars.

Source

pub fn sqrt(&self) -> Column

Square root (PySpark sqrt)

Source

pub fn pow(&self, exp: i64) -> Column

Power (PySpark pow). Exponent can be literal or expression.

Source

pub fn power(&self, exp: i64) -> Column

Alias for pow. PySpark power.

Source

pub fn exp(&self) -> Column

Exponential (PySpark exp)

Source

pub fn log(&self) -> Column

Natural logarithm (PySpark log)

Source

pub fn ln(&self) -> Column

Alias for log. PySpark ln.

Source

pub fn sin(&self) -> Column

Sine (radians). PySpark sin.

Source

pub fn cos(&self) -> Column

Cosine (radians). PySpark cos.

Source

pub fn tan(&self) -> Column

Tangent (radians). PySpark tan.

Source

pub fn cot(&self) -> Column

Cotangent: 1/tan (PySpark cot).

Source

pub fn csc(&self) -> Column

Cosecant: 1/sin (PySpark csc).

Source

pub fn sec(&self) -> Column

Secant: 1/cos (PySpark sec).

Source

pub fn asin(&self) -> Column

Arc sine. PySpark asin.

Source

pub fn acos(&self) -> Column

Arc cosine. PySpark acos.

Source

pub fn atan(&self) -> Column

Arc tangent. PySpark atan.

Source

pub fn atan2(&self, x: &Column) -> Column

Two-argument arc tangent (y, x) -> angle in radians. PySpark atan2.

Source

pub fn degrees(&self) -> Column

Convert radians to degrees. PySpark degrees.

Source

pub fn to_degrees(&self) -> Column

Alias for degrees. PySpark toDegrees.

Source

pub fn radians(&self) -> Column

Convert degrees to radians. PySpark radians.

Source

pub fn to_radians(&self) -> Column

Alias for radians. PySpark toRadians.

Source

pub fn signum(&self) -> Column

Sign of the number (-1, 0, or 1). PySpark signum.

Source

pub fn cosh(&self) -> Column

Hyperbolic cosine. PySpark cosh.

Source

pub fn sinh(&self) -> Column

Hyperbolic sine. PySpark sinh.

Source

pub fn tanh(&self) -> Column

Hyperbolic tangent. PySpark tanh.

Source

pub fn acosh(&self) -> Column

Inverse hyperbolic cosine. PySpark acosh.

Source

pub fn asinh(&self) -> Column

Inverse hyperbolic sine. PySpark asinh.

Source

pub fn atanh(&self) -> Column

Inverse hyperbolic tangent. PySpark atanh.

Source

pub fn cbrt(&self) -> Column

Cube root. PySpark cbrt.

Source

pub fn expm1(&self) -> Column

exp(x) - 1. PySpark expm1.

Source

pub fn log1p(&self) -> Column

log(1 + x). PySpark log1p.

Source

pub fn log10(&self) -> Column

Base-10 logarithm. PySpark log10.

Source

pub fn log2(&self) -> Column

Base-2 logarithm. PySpark log2.

Source

pub fn rint(&self) -> Column

Round to nearest integer. PySpark rint.

Source

pub fn hypot(&self, other: &Column) -> Column

sqrt(x^2 + y^2). PySpark hypot.

Source

pub fn cast_to(&self, type_name: &str) -> Result<Column, String>

Cast to the given type (PySpark cast). Fails on invalid conversion.

Source

pub fn try_cast_to(&self, type_name: &str) -> Result<Column, String>

Cast to the given type, null on invalid conversion (PySpark try_cast).

Source

pub fn is_nan(&self) -> Column

True where the float value is NaN (PySpark isnan).

Source

pub fn year(&self) -> Column

Extract year from datetime column (PySpark year)

Source

pub fn month(&self) -> Column

Extract month from datetime column (PySpark month)

Source

pub fn day(&self) -> Column

Extract day of month from datetime column (PySpark day)

Source

pub fn dayofmonth(&self) -> Column

Alias for day. PySpark dayofmonth.

Source

pub fn quarter(&self) -> Column

Extract quarter (1-4) from date/datetime column (PySpark quarter).

Source

pub fn weekofyear(&self) -> Column

Extract ISO week of year (1-53) (PySpark weekofyear / week).

Source

pub fn week(&self) -> Column

Alias for weekofyear (PySpark week).

Source

pub fn dayofweek(&self) -> Column

Day of week: 1 = Sunday, 2 = Monday, …, 7 = Saturday (PySpark dayofweek). Polars weekday is Mon=1..Sun=7; we convert to Sun=1..Sat=7.

Source

pub fn dayofyear(&self) -> Column

Day of year (1-366) (PySpark dayofyear).

Source

pub fn to_date(&self) -> Column

Cast to date (PySpark to_date). Drops time component from datetime/timestamp.

Source

pub fn date_format(&self, format: &str) -> Column

Format date/datetime as string (PySpark date_format). Uses chrono strftime format.

Source

pub fn hour(&self) -> Column

Extract hour from datetime column (PySpark hour).

Source

pub fn minute(&self) -> Column

Extract minute from datetime column (PySpark minute).

Source

pub fn second(&self) -> Column

Extract second from datetime column (PySpark second).

Source

pub fn extract(&self, field: &str) -> Column

Extract field from date/datetime (PySpark extract). field: “year”,“month”,“day”,“hour”,“minute”,“second”,“quarter”,“week”,“dayofweek”,“dayofyear”.

Source

pub fn unix_micros(&self) -> Column

Timestamp to microseconds since epoch (PySpark unix_micros).

Source

pub fn unix_millis(&self) -> Column

Timestamp to milliseconds since epoch (PySpark unix_millis).

Source

pub fn unix_seconds(&self) -> Column

Timestamp to seconds since epoch (PySpark unix_seconds).

Source

pub fn dayname(&self) -> Column

Weekday name “Mon”,“Tue”,… (PySpark dayname).

Source

pub fn weekday(&self) -> Column

Weekday 0=Mon, 6=Sun (PySpark weekday).

Source

pub fn date_add(&self, n: i32) -> Column

Add n days to date/datetime column (PySpark date_add).

Source

pub fn date_sub(&self, n: i32) -> Column

Subtract n days from date/datetime column (PySpark date_sub).

Source

pub fn datediff(&self, other: &Column) -> Column

Number of days between two date/datetime columns (PySpark datediff). (end - start).

Source

pub fn last_day(&self) -> Column

Last day of the month for date/datetime column (PySpark last_day).

Source

pub fn timestampadd(&self, unit: &str, amount: &Column) -> Column

Add amount of unit to timestamp (PySpark timestampadd). unit: DAY, HOUR, MINUTE, SECOND, etc.

Source

pub fn timestampdiff(&self, unit: &str, other: &Column) -> Column

Difference between timestamps in given unit (PySpark timestampdiff). unit: DAY, HOUR, MINUTE, SECOND.

Source

pub fn from_utc_timestamp(&self, tz: &str) -> Column

Interpret timestamp as UTC, convert to target timezone (PySpark from_utc_timestamp).

Source

pub fn to_utc_timestamp(&self, tz: &str) -> Column

Interpret timestamp as in tz, convert to UTC (PySpark to_utc_timestamp).

Source

pub fn trunc(&self, format: &str) -> Column

Truncate date/datetime to unit (e.g. “mo”, “wk”, “day”). PySpark trunc.

Source

pub fn add_months(&self, n: i32) -> Column

Add n months to date/datetime column (PySpark add_months). Month-aware.

Source

pub fn months_between(&self, start: &Column, round_off: bool) -> Column

Number of months between end and start dates, as fractional (PySpark months_between). When round_off is true, rounds to 8 decimal places (PySpark default).

Source

pub fn next_day(&self, day_of_week: &str) -> Column

Next date that is the given day of week (e.g. “Mon”, “Tue”) (PySpark next_day).

Source

pub fn unix_timestamp(&self, format: Option<&str>) -> Column

Parse string timestamp to seconds since epoch (PySpark unix_timestamp).

Source

pub fn from_unixtime(&self, format: Option<&str>) -> Column

Convert seconds since epoch to formatted string (PySpark from_unixtime).

Source

pub fn timestamp_seconds(&self) -> Column

Convert seconds since epoch to timestamp (PySpark timestamp_seconds).

Source

pub fn timestamp_millis(&self) -> Column

Convert milliseconds since epoch to timestamp (PySpark timestamp_millis).

Source

pub fn timestamp_micros(&self) -> Column

Convert microseconds since epoch to timestamp (PySpark timestamp_micros).

Source

pub fn unix_date(&self) -> Column

Date to days since 1970-01-01 (PySpark unix_date).

Source

pub fn date_from_unix_date(&self) -> Column

Days since epoch to date (PySpark date_from_unix_date).

Source

pub fn pmod(&self, divisor: &Column) -> Column

Positive modulus (PySpark pmod). Column method: pmod(self, other).

Source

pub fn factorial(&self) -> Column

Factorial n! for n in 0..=20 (PySpark factorial).

Source

pub fn over(&self, partition_by: &[&str]) -> Column

Apply window partitioning. Returns a new Column with .over(partition_by). Use after rank(), dense_rank(), row_number(), lag(), lead().

Source

pub fn rank(&self, descending: bool) -> Column

Rank (with ties, gaps). Use with .over(partition_by).

Source

pub fn dense_rank(&self, descending: bool) -> Column

Dense rank (no gaps). Use with .over(partition_by).

Source

pub fn row_number(&self, descending: bool) -> Column

Row number (1, 2, 3 by this column’s order). Use with .over(partition_by).

Source

pub fn lag(&self, n: i64) -> Column

Lag: value from n rows before. Use with .over(partition_by).

Source

pub fn lead(&self, n: i64) -> Column

Lead: value from n rows after. Use with .over(partition_by).

Source

pub fn first_value(&self) -> Column

First value in partition (PySpark first_value). Use with .over(partition_by).

Source

pub fn last_value(&self) -> Column

Last value in partition (PySpark last_value). Use with .over(partition_by).

Source

pub fn percent_rank(&self, partition_by: &[&str], descending: bool) -> Column

Percent rank in partition: (rank - 1) / (count - 1). Window is applied; do not call .over() again.

Source

pub fn cume_dist(&self, partition_by: &[&str], descending: bool) -> Column

Cumulative distribution in partition: row_number / count. Window is applied; do not call .over() again.

Source

pub fn ntile(&self, n: u32, partition_by: &[&str], descending: bool) -> Column

Ntile: bucket 1..n by rank within partition (ceil(rank * n / count)). Window is applied; do not call .over() again.

Source

pub fn nth_value( &self, n: i64, partition_by: &[&str], descending: bool, ) -> Column

Nth value in partition by order (1-based n). Returns a Column with window already applied; do not call .over() again.

Source

pub fn array_size(&self) -> Column

Number of elements in list (PySpark size / array_size). Returns Int32.

Source

pub fn cardinality(&self) -> Column

Cardinality: number of elements in array/list (PySpark cardinality). Alias for array_size.

Source

pub fn array_contains(&self, value: Expr) -> Column

Check if list contains value (PySpark array_contains).

Source

pub fn array_join(&self, separator: &str) -> Column

Join list of strings with separator (PySpark array_join).

Source

pub fn array_max(&self) -> Column

Maximum element in list (PySpark array_max).

Source

pub fn array_min(&self) -> Column

Minimum element in list (PySpark array_min).

Source

pub fn element_at(&self, index: i64) -> Column

Get element at 1-based index (PySpark element_at). Returns null if out of bounds.

Source

pub fn array_sort(&self) -> Column

Sort list elements (PySpark array_sort). Ascending, nulls last.

Source

pub fn array_distinct(&self) -> Column

Distinct elements in list (PySpark array_distinct). Preserves first-occurrence order.

Source

pub fn mode(&self) -> Column

Mode aggregation - most frequent value (PySpark mode). Uses value_counts sorted by count descending, then first.

Source

pub fn array_slice(&self, start: i64, length: Option<i64>) -> Column

Slice list from start with optional length (PySpark slice). 1-based start.

Source

pub fn explode(&self) -> Column

Explode list into one row per element (PySpark explode).

Source

pub fn explode_outer(&self) -> Column

Explode list; null/empty produces one row with null (PySpark explode_outer).

Source

pub fn posexplode_outer(&self) -> (Column, Column)

Posexplode with null preservation (PySpark posexplode_outer).

Source

pub fn arrays_zip(&self, other: &Column) -> Column

Zip two arrays element-wise into array of structs (PySpark arrays_zip).

Source

pub fn arrays_overlap(&self, other: &Column) -> Column

True if two arrays have any element in common (PySpark arrays_overlap).

Source

pub fn array_agg(&self) -> Column

Collect to array (PySpark array_agg). Alias for implode in group context.

Source

pub fn array_position(&self, value: Expr) -> Column

1-based index of first occurrence of value in list, or 0 if not found (PySpark array_position). Uses Polars list.eval with col(“”) as element (requires polars list_eval feature).

Source

pub fn array_compact(&self) -> Column

Remove null elements from list (PySpark array_compact). Preserves order.

Source

pub fn array_remove(&self, value: Expr) -> Column

New list with all elements equal to value removed (PySpark array_remove). Uses list.eval + drop_nulls (requires polars list_eval and list_drop_nulls).

Source

pub fn array_repeat(&self, n: i64) -> Column

Repeat each element n times (PySpark array_repeat). Implemented via map UDF.

Source

pub fn array_flatten(&self) -> Column

Flatten list of lists to one list (PySpark flatten). Implemented via map UDF.

Source

pub fn array_append(&self, elem: &Column) -> Column

Append element to end of list (PySpark array_append).

Source

pub fn array_prepend(&self, elem: &Column) -> Column

Prepend element to start of list (PySpark array_prepend).

Source

pub fn array_insert(&self, pos: &Column, elem: &Column) -> Column

Insert element at 1-based position (PySpark array_insert).

Source

pub fn array_except(&self, other: &Column) -> Column

Elements in first array not in second (PySpark array_except).

Source

pub fn array_intersect(&self, other: &Column) -> Column

Elements in both arrays (PySpark array_intersect).

Source

pub fn array_union(&self, other: &Column) -> Column

Distinct elements from both arrays (PySpark array_union).

Source

pub fn zip_with(&self, other: &Column, merge: Expr) -> Column

Zip two arrays element-wise with merge function (PySpark zip_with). Shorter array padded with null. Merge Expr uses col(“”).struct_().field_by_name(“left”) and field_by_name(“right”).

Source

pub fn array_exists(&self, predicate: Expr) -> Column

True if any list element satisfies the predicate (PySpark exists). Uses list.eval(pred).list().any().

Source

pub fn array_forall(&self, predicate: Expr) -> Column

True if all list elements satisfy the predicate (PySpark forall). Uses list.eval(pred).list().all().

Source

pub fn array_filter(&self, predicate: Expr) -> Column

Filter list elements by predicate (PySpark filter). Keeps elements where predicate is true.

Source

pub fn array_transform(&self, f: Expr) -> Column

Transform list elements by expression (PySpark transform). list.eval(expr).

Source

pub fn array_sum(&self) -> Column

Sum of list elements (PySpark aggregate with sum). Uses list.sum().

Source

pub fn array_aggregate(&self, zero: &Column) -> Column

Array fold/aggregate (PySpark aggregate). Simplified: zero + sum(list). Full (zero, merge, finish) deferred.

Source

pub fn array_mean(&self) -> Column

Mean of list elements (PySpark aggregate with avg). Uses list.mean().

Source

pub fn posexplode(&self) -> (Column, Column)

Explode list with position (PySpark posexplode). Returns (pos_col, value_col). pos is 1-based; uses list.eval(cum_count()).explode() and explode().

Source

pub fn map_keys(&self) -> Column

Extract keys from a map column (PySpark map_keys). Map column is List(Struct{key, value}).

Source

pub fn map_values(&self) -> Column

Extract values from a map column (PySpark map_values). Map column is List(Struct{key, value}).

Source

pub fn map_entries(&self) -> Column

Return map as list of structs {key, value} (PySpark map_entries). Identity for List(Struct) column.

Source

pub fn map_from_arrays(&self, values: &Column) -> Column

Build map from two array columns (keys, values) (PySpark map_from_arrays). Implemented via map_many UDF.

Source

pub fn map_concat(&self, other: &Column) -> Column

Merge two map columns (PySpark map_concat). Last value wins for duplicate keys.

Source

pub fn transform_keys(&self, key_expr: Expr) -> Column

Transform each map key by expr (PySpark transform_keys). key_expr should use col(“”).struct_().field_by_name(“key”).

Source

pub fn transform_values(&self, value_expr: Expr) -> Column

Transform each map value by expr (PySpark transform_values). value_expr should use col(“”).struct_().field_by_name(“value”).

Source

pub fn map_zip_with(&self, other: &Column, merge: Expr) -> Column

Merge two maps by key with merge function (PySpark map_zip_with). Merge Expr uses col(“”).struct_().field_by_name(“value1”) and field_by_name(“value2”).

Source

pub fn map_filter(&self, predicate: Expr) -> Column

Filter map entries by predicate (PySpark map_filter). Keeps key-value pairs where predicate is true. Predicate uses col(“”).struct_().field_by_name(“key”) and field_by_name(“value”) to reference key/value.

Source

pub fn map_from_entries(&self) -> Column

Array of structs {key, value} to map (PySpark map_from_entries). Identity for List(Struct) format.

Source

pub fn map_contains_key(&self, key: &Column) -> Column

True if map contains key (PySpark map_contains_key).

Source

pub fn get(&self, key: &Column) -> Column

Get value for key from map, or null (PySpark get).

Source

pub fn get_json_object(&self, path: &str) -> Column

Extract JSON path from string column (PySpark get_json_object). Uses Polars str().json_path_match.

Source

pub fn from_json(&self, schema: Option<DataType>) -> Column

Parse string column as JSON into struct (PySpark from_json). Uses Polars str().json_decode.

Source

pub fn to_json(&self) -> Column

Serialize struct column to JSON string (PySpark to_json). Uses Polars struct().json_encode.

Source

pub fn json_array_length(&self, path: &str) -> Column

Length of JSON array at path (PySpark json_array_length). UDF.

Source

pub fn json_object_keys(&self) -> Column

Keys of JSON object (PySpark json_object_keys). Returns list of strings. UDF.

Source

pub fn json_tuple(&self, keys: &[&str]) -> Column

Extract keys from JSON as struct (PySpark json_tuple). UDF. Returns struct with one string field per key.

Source

pub fn from_csv(&self) -> Column

Parse CSV string to struct (PySpark from_csv). Minimal: split by comma, up to 32 columns. UDF.

Source

pub fn to_csv(&self) -> Column

Format struct as CSV string (PySpark to_csv). Minimal. UDF.

Source

pub fn parse_url(&self, part: &str, key: Option<&str>) -> Column

Parse URL and extract part (PySpark parse_url). UDF. When part is QUERY/QUERYSTRING and key is Some(k), returns the value for that query parameter only.

Source

pub fn hash(&self) -> Column

Hash of column value (PySpark hash). Single-column version.

Source

pub fn isin(&self, other: &Column) -> Column

Check if column values are in the other column’s list/series (PySpark isin).

Source

pub fn url_decode(&self) -> Column

Percent-decode URL-encoded string (PySpark url_decode). Uses UDF.

Source

pub fn url_encode(&self) -> Column

Percent-encode string for URL (PySpark url_encode). Uses UDF.

Source

pub fn shift_left(&self, n: i32) -> Column

Bitwise left shift (PySpark shiftLeft). col << n = col * 2^n.

Source

pub fn shift_right(&self, n: i32) -> Column

Bitwise signed right shift (PySpark shiftRight). col >> n = col / 2^n.

Source

pub fn shift_right_unsigned(&self, n: i32) -> Column

Bitwise unsigned right shift (PySpark shiftRightUnsigned). Logical shift.

Trait Implementations§

Source §

impl Clone for Column

Source §

fn clone(&self) -> Column

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for Column

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl !UnwindSafe for Column

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> DynClone for T
where T: Clone,

Source §

fn __clone_box(&self, _: Private) -> *mut ()

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §