@!metadata(name = "std")
#! Standard library
## Annotates a definition with documentation.
anno doc(content: Text)
## Annotates a definition to be hidden from documentation.
anno hidden()
## Annotates a project with metadata.
anno metadata(name: Text)
## Annotates a project with default runner URL.
# TODO: maybe merge this into metadata (it would be nice to have an optional param)
anno runner(url: Text)
## Annotates a type with traits to `#[Derive]` when translating to Rust.
anno rust_derive(macros: [Text])
import convert::*
## Type conversions
module convert {
## Constructs the usual "default" value
##
## For `bool` it is false.
## For numbers, this is zero.
## For `text` it is an empty text.
## For tuples, it a tuple with every field set to default value.
## For arrays, it is an empty array.
## For enums, it the first variant.
external func default(): T
where T
## Converts a number to `int8`
external func to_int8(T): Int8
where T: AnyNumber
## Converts a number to `int16`
external func to_int16(T): Int16
where T: AnyNumber
## Converts a number to `int32`
external func to_int32(T): Int32
where T: AnyNumber
## Converts a number to `int64`
external func to_int64(T): Int64
where T: AnyNumber
## Converts a number to `uint8`
external func to_uint8(T): Uint8
where T: AnyNumber
## Converts a number to `uint16`
external func to_uint16(T): Uint16
where T: AnyNumber
## Converts a number to `uint32`
external func to_uint32(T): Uint32
where T: AnyNumber
## Converts a number to `uint64`
external func to_uint64(T): Uint64
where T: AnyNumber
## Converts a number to `float32`
external func to_float32(T): Float32
where T: AnyNumber
## Converts a number to `float64`
external func to_float64(T): Float64
where T: AnyNumber
## Converts a value to `text`
external func to_text(T): Text
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
}
import ops::*
## Operators
module ops {
## Multiplies two numbers.
external func mul(T, T): T
where T: AnyNumber
## Divides two numbers.
external func div(T, T): T
where T: AnyNumber
## Division remainder.
external func mod(T, T): T
where T: AnyNumber
## Adds two numbers.
external func add(T, T): T
where T: AnyNumber | Time | Decimal
## Subtracts two numbers.
external func sub(T, T): T
where T: AnyNumber | Time | Decimal
## Negates a number.
external func neg(T): T
where T: Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Time
## Compare two values to determine if the first is less, equal, or greater than
## the second.
external func cmp(T, T): Ordering
where T
## Result of a three-way comparison. Returned by [cmp].
type Ordering: enum {less, equal, greater}
## Tests if values are equal. Used by `==` operator.
func eq(left: T, right: T): Bool
where T
-> (
match cmp(left, right) {
.equal => true,
_ => false,
}
)
## Tests if values are not equal. Used by `!=` operator.
func ne(left: T, right: T): Bool
where T
-> (
match cmp(left, right) {
.equal => false,
_ => true,
}
)
## Tests if left value is greater than the right. Used by `>` operator.
func gt(left: T, right: T): Bool
where T
-> (
match cmp(left, right) {
.greater => true,
_ => false,
}
)
## Tests if left value is less than the right. Used by `<` operator.
func lt(left: T, right: T): Bool
where T
-> (
match cmp(left, right) {
.less => true,
_ => false,
}
)
## Tests if left value is greater or equal to the right. Used by `>=` operator.
func gte(left: T, right: T): Bool
where T
-> (
match cmp(left, right) {
.less => false,
_ => true,
}
)
## Tests if left value is less or equal to the right. Used by `<=` operator.
func lte(left: T, right: T): Bool
where T
-> (
match cmp(left, right) {
.greater => false,
_ => true,
}
)
## Tests if both values are true. Used by `&&` operator.
external func and(Bool, Bool): Bool
## Tests if either of the values is true. Used by `||` operator.
external func or(Bool, Bool): Bool
## Inverts a boolean. Used by `!` operator.
external func not(Bool): Bool
}
import option::(is_none, is_some, map_or, or_default, or_else)
## Nullable values
module option {
## Returns `true` iff the value is `.some`.
func is_some(value: T?): Bool
where T
-> (
match value {
.some => true,
.none => false,
}
)
## Returns `true` iff the value is `.none`.
## The inverse of [is_some].
func is_none(value: T?): Bool
where T
-> (
match value {
.some => false,
.none => true,
}
)
## When value is `.none`, returns a fallback value instead.
func or_else(value: T?, fallback fallback: T): T
where T
-> (
match value {
.some(x) => x,
.none => fallback,
}
)
## When value is `.none`, returns the default value instead.
##
## Default value depends on the type. See `default`.
## For numbers it is 0, for booleans it is false.
func or_default(value: T?): T
where T
-> (
match value {
.some(x) => x,
.none => default(),
}
)
## Zips two options together into an option of a tuple.
## Returns `.some({a, b})` only if both `a` and `b` are `.some`.
# TODO: maybe merge this into std::zip?
func zip(a: A?, b: B?): enum {none, some: {A, B}}
where A, B
-> (
match a {
.none => .none,
.some(a) => match b {
.none => .none,
.some(b) => .some({a, b}),
},
}
)
## Maps value in `.some`. Does nothing if the value is `.none`.
# TODO: maybe merge this into std::map?
func map(value: I?, mapper: func (I): O): O?
where I, O
-> (
match value {
.none => .none,
.some(v) => .some(mapper(v)),
}
)
## Maps value in `.some`. Does nothing if the value is `.none`.
# TODO: maybe merge this into std::flat_map?
func flat_map(value: I?, mapper: func (I): O?): O?
where I, O
-> (
match value {
.none => .none,
.some(v) => mapper(v),
}
)
## When value is `.none` returns fallback, otherwise maps value in `.some`.
func map_or(value: I?, fallback fallback: O, mapper: func (I): O): O
where I, O
-> (
match value {
.none => fallback,
.some(v) => mapper(v),
}
)
}
import array::*
## Array functions
##
## Basic array functions, aggregation, and window functions
module array {
## Returns the item at a zero-based `position`, or `.none` if out of bounds.
external func index(array: [T], position: Int64): T?
where T
## Returns the number of items in an array.
external func count(array: [T]): Int64
where T
## Returns `true` if the array contains no items.
func is_empty(array: [T]): Bool
where T
-> eq(count(array), 0)
## Transforms each item using a `mapper` function.
## Returns a new array of the same length.
external func map(array: [I], mapper: func (I): O): [O]
where I, O
## Applies `mapper` to each item and concatenates the resulting arrays.
external func flat_map(array: [I], mapper: func (I): [O]): [O]
where I, O
## Returns only the items for which `condition` returns `true`.
external func filter(array: [T], condition: func (T): Bool): [T]
where T
## Returns the first item for which `condition` returns `true`,
## or `.none` if no item matches.
func find(array array: [T], condition: func (T): Bool): T?
where T
-> (array | filter(condition) | index(0))
## Returns items from index `start` (inclusive) to `end` (exclusive).
external func slice(array: [T], start: Int64, end: Int64): [T]
where T
## Sorts the array in ascending order by the value returned by `key`.
external func sort(array: [I], key: func (I): K): [I]
where I, K: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
## Transposes an array of rows into a tuple of columns.
## Each field of the row type becomes a column — an array of that field's values.
## The inverse of [from_columnar].
external func to_columnar(rows: [T]): {for f: F in T do f: [F]}
where T: {..}
## Transposes a tuple of columns into an array of rows.
## The inverse of [to_columnar].
external func from_columnar(columnar: {for f: F in T do f: [F]}): [T]
where T: {..}
## Applies `mapper` to the array in columnar form.
## Equivalent to [to_columnar], then `mapper`, then [from_columnar].
func map_columnar(
array x: [I],
mapper mapper: func ({for f: F in I do f: [F]}): {for f: F in O do f: [F]},
): [O]
where I: {..}, O: {..}
-> (x | to_columnar | mapper | from_columnar)
## Reduces an array to a single value by applying `mapper` in columnar form.
## Equivalent to [to_columnar] followed by `mapper`.
func aggregate(
array x: [T], mapper mapper: func ({for f: F in T do f: [F]}): O,
): O
where T: {..}, O
-> (x | to_columnar | mapper)
## Pairs items from `left` and `right` by position.
## The result length equals the shorter of the two inputs.
external func zip(left: [L], right: [R]): [{L, R}]
where L, R
## Groups array items by a key.
## Returns groups, which contain the key and an array of values in this group.
external func group(array: [I], get_key: func (I): K): [{key: K, values: [I]}]
where I, K
## Groups array items by a key.
## Returns groups, which contain the key and an array of values in this group.
func group_map(
array input: [I],
get_key get_key: func (I): K,
mapper mapper: func (K, [I]): O,
): [O]
where I, K, O
-> (input | group(get_key) | map(x -> mapper(x.key, x.values)))
## Deduplicates array items.
func distinct(array: [T]): [T]
where T
-> (array | group(x -> x) | map(x -> x.key))
## Concatenates two arrays, `first` items followed by `second` items.
external func append(first: [T], second: [T]): [T]
where T
## Folds every input into the an accumulator value by applying an operation,
## returning the final result.
##
## The `operation` takes two arguments: accumulator and an input.
## It returns the value that the accumulator should have in next iteration.
##
## The `initial` value is the accumulator for the first call.
##
## After folding all inputs, the last accumulator is returned.
##
## This function is sometimes also called "reduce" or "inject".
## This function is similar to [scan], but it returns only the final
## accumulator.
external func fold(array: [I], initial: A, operation: func (A, I): A): A
where I, A
## Applies an operation to each input, using the output from the previous
## iteration.
##
## The `operation` takes two arguments: accumulator and an input.
## It returns the value that the accumulator should have in next iteration.
##
## The `initial` value is the accumulator for the first call.
##
## Returns values of all produced accumulators.
##
## This function is similar to [fold], but it returns all accumulators,
## instead of only the final one.
external func scan(array: [I], initial: A, operation: func (A, I): A): [A]
where I, A
## Applies an operation to an array, until it returns an empty array.
## First operation is supplied with the initial array.
## Each following operation is supplied with result of previous operation.
##
## Returns all produced arrays concatenated together, including initial array.
##
## In SQL, this is known as "RECURSIVE CTE" or "recursive join".
external func loop_until_empty(initial: [T], operation: func ([T]): [T]): [T]
where T
## Returns an array of sequential integers
## from start (inclusive) to end (exclusive).
##
## For example: `sequence(0, 3) == [0, 1, 2]`
external func sequence(start: N, end: N): [N]
where N: Int8 | Int16 | Int32 | Int64 | Uint8 | Uint16 | Uint32 | Uint64
# --- Aggregation functions ---
## Find the minimum value in the array.
## Returns `none` when array is empty.
external func min([T]): T?
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
## Find the maximum value in the array.
## Returns `none` when array is empty.
external func max([T]): T?
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
## Compute sum of all number in an array.
## Returns zero when array is empty.
external func sum([T]): T
where T: AnyNumber | Time
## Compute arithmetic mean of an array of numbers.
external func mean([T]): Float64
where T: AnyNumber
## Returns `true` if all items in the array are `true`.
external func all([Bool]): Bool
## Returns `true` if any value in the array is `true`.
external func any([Bool]): Bool
## Returns `true` if the haystack array contains an item equal to `needle`.
func contains(haystack haystack: [T], needle needle: T): Bool
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
-> (haystack | filter(x -> eq(x, needle)) | is_empty | not)
# --- Window functions ---
## Shifts array items backwards by an offset.
## Items in front are set to their default value.
##
## For example, `lag(["a", "b", "c"], 1)` is `["", "a", "b"]`.
external func lag(array: [T], offset: Int64): [T]
where T
## Shifts array items forwards by an offset.
## Items in back are set to their default value.
##
## For example, `lead(["a", "b", "c"], 1)` is `["b", "c", ""]`.
external func lead(array: [T], offset: Int64): [T]
where T
## Computes rolling arithmetic mean over an array of numbers.
## Also known as moving average.
##
## Returns an array of same length as the input,
## where each item is the mean of the corresponding input item and
## a number of preceding and following items.
##
## For example, `rolling_mean(..., 1, 2)` computes mean of each item, along with
## 1 preceding and 2 following items, which is 4 items total.
external func rolling_mean(array: [T], preceding: Uint32, following: Uint32): [Float64]
where T: AnyNumber
## Computes rank of each array item.
##
## Rank is the number of items that are less than current item, plus one.
##
## The values range from `1` to `n` (number of items).
## Also known as "min rank".
##
## For example, `rank(["a", "b", "b", "c"])` is `[1, 2, 2, 4]`.
external func rank(array: [T]): [Int32]
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
## Computes dense rank of each array item.
##
## Dense rank is the number of unique items that are less than current item,
## plus one.
##
## The values range from `1` to `n` (number of unique items).
##
## For example, `rank_dense(["a", "b", "b", "c"])` is `[1, 2, 2, 3]`.
external func rank_dense(array: [T]): [Int32]
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
## Computes percentile rank of each array item.
##
## Percentile rank is the item rank rescaled to range from 0.0 to 1.0.
##
## The values range from 0.0 to 1.0.
##
## For example, `rank_percentile(["a", "b", "b", "c"])` is
## `[0.0, 0.33333, 0.25, 1.0]`.
external func rank_percentile(array: [T]): [Float64]
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
## Computes ECDF (Empirical Cumulative Density Function) of array items.
##
## Value of each item i is the number of items that are less than or equal to i,
## divided by the total number of items.
##
## The values range from `1.0/n` to `1.0`.
##
## For example, `cume_dist(["a", "b", "b", "c"])` is
## `[0.25, 0.75, 0.75, 1.0]`.
external func cume_dist(array: [T]): [Float64]
where T: AnyNumber | Bool | Text | Timestamp | Date | Time | Decimal
}
## Mathematical ops
module math {
## Absolute value
external func abs(T): T
where T: AnyNumber
# func sqrt(val: int64): int64
# func ceil(val: int64): int64
# func floor(val: int64): int64
# func round(val: int64, n_digits: int64): int64
## The mathematical constant π as a 32-bit float.
const pi32: Float32 = 3.1415927
## The mathematical constant π as a 64-bit float.
const pi64: Float64 = 3.141592653589793
# TODO: exponent should probably always be uint32
## Raises `value` to the power of `exponent`.
external func pow(value: T, exponent: T): T
where T: Int64 | Float64
# func exp(val: int64): int64
# func ln(val: int64): int64
# func log10(val: int64): int64
# func log(base: int64, val: int64): int64
# func degrees(val: int64): int64
# func radians(val: int64): int64
# func cos(val: int64): int64
# func acos(val: int64): int64
# func sin(val: int64): int64
# func asin(val: int64): int64
# func tan(val: int64): int64
# func atan(val: int64): int64
}
## Text functions
module text {
## Concatenates two text values.
external func concat(left: Text, right: Text): Text
## Returns the number of characters in a text value.
external func length(Text): Uint32
## Converts ASCII code to its corresponding Unicode character.
## This function always succeeds, because ASCII is a subset of Unicode.
external func from_ascii(Uint8): Text
## Joins parts together, placing separator between each pair.
external func join(parts: [Text], separator: Text): Text
## Splits text into substrings at every occurrence of a separator.
##
## For example, `split("1_two_3", "_")` is `["1", "two", "3"]`
external func split(value: Text, separator: Text): [Text]
# func lower(value: Text): Text
# func upper(value: Text): Text
# func ltrim(value: Text): Text
# func rtrim(value: Text): Text
# func trim(value: Text): Text
# func substring(value: Text, offset: int64, length: int64): Text
# func replace(value: Text, pattern: Text, replacement: Text): Text
## Tests if a text starts with a prefix.
external func starts_with(value: Text, prefix: Text): Bool
## Tests if a text contains a pattern.
external func contains(value: Text, pattern: Text): Bool
## Tests if a text ends with a suffix.
external func ends_with(value: Text, suffix: Text): Bool
# func regex_search(haystack: Text, pattern: Text): Bool
}
## File system operations
module fs {
## Reads rows from a Parquet file at `file_name`.
external func read_parquet(file_name: Text): [R]
where R: {..}
## Writes `data` to a Parquet file at `file_name`.
external func write_parquet(data: [R], file_name: Text): {}
where R: {..}
}
## SQL interface
##
## Module available only when targeting SQL databases.
## Supported SQL targets are DuckDB (`repr:sql-duckdb`) and PostgreSQL
## (`repr:sql-pg`). Storage details differ between runners; see the
## DuckDB reference and the type representations reference.
module sql {
## Reads rows from a table.
##
## The order of table rows is unknown and might change over repeated invocations.
##
## Table identifier can contain slashes to denote table namespaces.
## Any additional slashes will remain in table name verbatim.
## For example:
## - `from("my_schema/invoices")` will read from `my_schema.invoices`,
## - `from("my_schema/hello/world")` will read from `my_schema."hello/world"`.
external func from(const table_name: Text): [R]
where R: {..}
## Inserts rows into a table.
##
## For table identifier format, see documentation of [module::from].
external func insert(rows: [R], const table_name: Text): {}
where R: {..}
## Updates rows in a table.
##
## The `updater` function is applied to each row in the table. When it returns
## `.none` the row is not updated, and when it return `.some(new_value)` it is
## updated to the new value.
##
## The function receives the current row values and can use them to decide
## whether to update and what the new values should be.
##
## Example:
## ```lt
## sql::update(
## "users",
## func (u: User) -> if u.age > 65 then (
## .some({status = "senior", ..u})
## ) else (
## .none
## )
## )
## ```
##
## For table identifier format, see documentation of [from].
external func update(const table_name: Text, updater: func (R): R?): {}
where R: {..}
## Evaluates raw SQL.
##
## This function is an escape hatch for accessing SQL directly.
## It should be used as a last resort, because it circumvents type checking
## and can cause program panic on malformed SQL or incorrect resulting type.
##
## Resulting type of the expression must match the SQL representation of the
## Lutra type for the selected runner.
external func raw(const sql_source: Text): R
where R
}
## A boolean value.
type Bool(Prim8)
## A signed 8-bit integer.
type Int8(Prim8)
## A signed 16-bit integer.
type Int16(Prim16)
## A signed 32-bit integer.
type Int32(Prim32)
## A signed 64-bit integer.
type Int64(Prim64)
## An unsigned 8-bit integer.
type Uint8(Prim8)
## An unsigned 16-bit integer.
type Uint16(Prim16)
## An unsigned 32-bit integer.
type Uint32(Prim32)
## An unsigned 64-bit integer.
type Uint64(Prim64)
## A 32-bit floating-point number.
type Float32(Prim32)
## A 64-bit floating-point number.
type Float64(Prim64)
## Unicode text of arbitrary length. Encoded as UTF-8.
type Text([Prim8])
## An instant in time. Timestamp without a timezone.
##
## Backed by a signed 64-bit integer, indicating microseconds since the Unix
## epoch (1970-01-01T00:00:00.000 UTC), excluding leap seconds.
# TODO: support for timezone and other time units.
type Timestamp(microseconds: Int64)
## Elapsed days since Unix Epoch (1970-01-01).
##
## Backed by a signed 32-bit integer, representing number of days.
type Date(days_epoch: Int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of seconds.
# type Time32Second(int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of milliseconds.
# type Time32Millisecond(int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of microseconds.
# type Time32Microsecond(int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 64-bit integer, representing number of seconds.
# type Time64Second(int64)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 64-bit integer, representing number of milliseconds.
# type Time64Millisecond(int64)
## Length of time, unrelated to calendar events.
## Can be interpreted as duration or offset from the midnight.
##
## Backed by a signed 64-bit integer, representing number of microseconds.
type Time(microseconds: Int64)
# func to_time64_second(x: D): Time64Second
# where D: Time32Second -> x.0 | to_int64 | Time64Second
# where D: Time32Millisecond -> x.0 | to_int64 | mul(1000) | Time64Second
# where D: Time32Microsecond -> x.0 | to_int64 | mul(1000000) | Time64Second
# where D: Time64Second -> x
# where D: Time64Millisecond -> x.0 | mul(1000) | Time64Second
# where D: Time64Microsecond -> x.0 | mul(1000000) | Time64Second
# func to_time64_millisecond(x: D): Time64Millisecond
# where D: Time32Second -> x.0 | to_int64 | div(1000) | Time64Millisecond
# where D: Time32Millisecond -> x.0 | to_int64 | Time64Millisecond
# where D: Time32Microsecond -> x.0 | to_int64 | mul(1000) | Time64Millisecond
# where D: Time64Second -> x.0 | div(1000) | Time64Millisecond
# where D: Time64Millisecond -> x
# where D: Time64Microsecond -> x.0 | mul(1000) | Time64Millisecond
# func to_time64_microsecond(x: D): Time64Microsecond
# where D: Time32Second -> x.0 | to_int64 | div(1000000) | Time64Microsecond
# where D: Time32Millisecond -> x.0 | to_int64 | div(1000) | Time64Microsecond
# where D: Time32Microsecond -> x.0 | to_int64 | Time64Microsecond
# where D: Time64Second -> x.0 | div(1000000) | Time64Microsecond
# where D: Time64Millisecond -> x.0 | div(1000) | Time64Microsecond
# where D: Time64Microsecond -> x
## Returns offset of a time zone from UTC on a given date.
func timezone_offset(time_zone tz: Text, date d: Date): Time -> (
timestamp::sub(
d | date::to_timestamp(tz),
d | date::to_timestamp("UTC"),
)
)
module date {
## Computes time duration between two dates.
##
## Dates are assumed to be in the same timezone.
## Duration is measured from one midnight to another.
func sub(a: Date, b: Date): Time -> (
project::sub(a.days_epoch, b.days_epoch) | to_int64 | mul(86400000000) | Time
)
## Computes the timestamp of a local date at a timezone.
external func to_timestamp(date: Date, time_zone: Text): Timestamp
## Converts a date into year, month, and day numbers.
## Month and day start with 1.
external func to_year_month_day(Date): {year: Int32, month: Uint8, day: Uint8}
}
module timestamp {
## Computes the local date of a timestamp at a timezone.
external func to_date(timestamp: Timestamp, time_zone: Text): Date
## Computes the elapsed `Time` between two `Timestamp`s.
## Result is `a - b`: positive when `a` is later than `b`.
func sub(a: Timestamp, b: Timestamp): Time -> project::sub(a.0, b.0) | Time
}
## Exact decimal value with a fixed `SCALE`, which is the number of digits past
## the decimal point.
## Currently, `SCALE` is 2. This will be configurable in the future.
## Can hold values from `-10**(19-SCALE)` to `10**(19-SCALE)`.
# TODO: implement const type param for SCALE.
type Decimal(Prim64)