# --- Utils ---
## Constructs the usual "default" value
##
## For `bool` it is false.
## For numbers, this is zero.
## For `text` it is an empty text.
## For tuples, it a tuple with every field set to default value.
## For arrays, it is an empty array.
## For enums, it the first variant.
func default(): T
where T
# --- Casts ---
## Converts a number to `int8`
func to_int8(x: T): int8
where T: number
## Converts a number to `int16`
func to_int16(x: T): int16
where T: number
## Converts a number to `int32`
func to_int32(x: T): int32
where T: number
## Converts a number to `int64`
func to_int64(x: T): int64
where T: number
## Converts a number to `uint8`
func to_uint8(x: T): uint8
where T: number
## Converts a number to `uint16`
func to_uint16(x: T): uint16
where T: number
## Converts a number to `uint32`
func to_uint32(x: T): uint32
where T: number
## Converts a number to `uint64`
func to_uint64(x: T): uint64
where T: number
## Converts a number to `float32`
func to_float32(x: T): float32
where T: number
## Converts a number to `float64`
func to_float64(x: T): float64
where T: number
## Converts a value to `text`
func to_text(x: T): text
where T: primitive
# --- Operators ---
## Multiplies two numbers.
func mul(left: T, right: T): T
where T: number
## Divides two numbers.
func div(left: T, right: T): T
where T: number
## Division remainder.
func mod(left: T, right: T): T
where T: number
## Adds two numbers.
func add(left: T, right: T): T
where T: number
## Subtracts two numbers.
func sub(left: T, right: T): T
where T: number
## Negates a number.
func neg(x: T): T
where T: int8 | int16 | int32 | int64 | float32 | float64
## Compare two values to determine if the first is less, equal, or greater than
## the second.
func cmp(a: T, b: T): Ordering
where T: primitive
type Ordering: enum {less, equal, greater}
## Tests if values are equal. Used by `==` operator.
func eq(left: T, right: T): bool
where T: primitive -> (
match cmp(left, right) {
.equal => true,
_ => false,
}
)
## Tests if values are not equal. Used by `!=` operator.
func ne(left: T, right: T): bool
where T: primitive -> (
match cmp(left, right) {
.equal => false,
_ => true,
}
)
## Tests if left value is greater than the right. Used by `>` operator.
func gt(left: T, right: T): bool
where T: primitive -> (
match cmp(left, right) {
.greater => true,
_ => false,
}
)
## Tests if left value is less than the right. Used by `<` operator.
func lt(left: T, right: T): bool
where T: primitive -> (
match cmp(left, right) {
.less => true,
_ => false,
}
)
## Tests if left value is greater or equal to the right. Used by `>=` operator.
func gte(left: T, right: T): bool
where T: primitive -> (
match cmp(left, right) {
.less => false,
_ => true,
}
)
## Tests if left value is less or equal to the right. Used by `<=` operator.
func lte(left: T, right: T): bool
where T: primitive -> (
match cmp(left, right) {
.greater => false,
_ => true,
}
)
## Tests if both values are true. Used by `&&` operator.
func and(left: bool, right: bool): bool
## Tests if either of the values is true. Used by `||` operator.
func or(left: bool, right: bool): bool
## Inverts a boolean. Used by `!` operator.
func not(expr: bool): bool
# --- Option functions ---
module option {
func is_some(value: enum {none, some: T}): bool
where T
-> (
match value {
.some => true,
.none => false,
}
)
func is_none(value: enum {none, some: T}): bool
where T
-> (
match value {
.some => false,
.none => true,
}
)
## When value is `.none`, returns a fallback value instead.
func or_else(value: enum {none, some: T}, fallback: T): T
where T
-> (
match value {
.some(x) => x,
.none => fallback,
}
)
## When value is `.none`, returns the default value instead.
##
## Default value depends on the type. See [super::default].
## For numbers it is 0, for booleans it is false.
func or_default(value: enum {none, some: T}): T
where T
-> (
match value {
.some(x) => x,
.none => super::default(),
}
)
## Zips two options together into an option of a tuple.
## Returns `.some({a, b})` only if both `a` and `b` are `.some`.
# TODO: maybe merge this into std::zip?
func zip(
a: enum {none, some: A},
b: enum {none, some: B},
): enum {none, some: {A, B}}
where A, B
-> (
match a {
.none => .none,
.some(a) => match b {
.none => .none,
.some(b) => .some({a, b}),
}
}
)
## Maps value in `.some`. Does nothing if the value is `.none`.
# TODO: maybe merge this into std::map?
func map(
value: enum {none, some: I},
mapper: func (I): O
): enum {none, some: O}
where I, O
-> (
match value {
.none => .none,
.some(v) => .some(mapper(v))
}
)
## Maps value in `.some`. Does nothing if the value is `.none`.
# TODO: maybe merge this into std::flat_map?
func flat_map(
value: enum {none, some: I},
mapper: func (I): enum {none, some: O}
): enum {none, some: O}
where I, O
-> (
match value {
.none => .none,
.some(v) => mapper(v)
}
)
## When value is `.none` returns fallback, otherwise maps value in `.some`.
func map_or(
value: enum {none, some: I},
fallback: O,
mapper: func (I): O
): O
where I, O
-> (
match value {
.none => fallback,
.some(v) => mapper(v)
}
)
}
# --- Array functions ---
func index(array: [T], position: int64): enum {none, some: T}
where T
func count(array: [T]): int64
where T
func is_empty(array: [T]): bool
where T
-> eq(count(array), 0)
func map(array: [I], mapper: func (I): O): [O]
where I, O
func flat_map(array: [I], mapper: func (I): [O]): [O]
where I, O
func filter(array: [T], condition: func (T): bool): [T]
where T
func find(array: [T], condition: func (T): bool): enum {none, some: T}
where T
-> (array | filter(condition) | index(0))
func slice(array: [T], start: int64, end: int64): [T]
where T
func sort(array: [I], key: func (I): K): [I]
where I, K: primitive
func to_columnar(rows: [T]): {for f: F in T do f: [F]}
where T: {..}
func from_columnar(columnar: {for f: F in T do f: [F]}): [T]
where T: {..}
func map_columnar(
x: [I], mapper: func ({for f: F in I do f: [F]}): {for f: F in O do f: [F]}
): [O]
where I: {..}, O: {..}
-> (x | to_columnar | mapper | from_columnar)
func aggregate(x: [T], mapper: func ({for f: F in T do f: [F]}): O): O
where T: {..}, O
-> (x | to_columnar | mapper)
func zip(left: [L], right: [R]): [{L, R}]
where L, R
## Groups array items by a key.
## Returns groups, which contain the key and an array of values in this group.
func group(input: [I], get_key: func (I): K): [{key: K, values: [I]}]
where I, K
## Groups array items by a key.
## Returns groups, which contain the key and an array of values in this group.
func group_map(input: [I], get_key: func (I): K, mapper: func (K, [I]): O): [O]
where I, K, O
-> (input | group(get_key) | map(x -> mapper(x.key, x.values)))
## Deduplicates array items.
func distinct(array: [T]): [T]
where T
-> (array | group(x -> x) | map(x -> x.key))
func append(top: [T], bottom: [T]): [T]
where T
## Folds every input into the an accumulator value by applying an operation,
## returning the final result.
##
## The `operation` takes two arguments: accumulator and an input.
## It returns the value that the accumulator should have in next iteration.
##
## The `initial` value is the accumulator for the first call.
##
## After folding all inputs, the last accumulator is returned.
##
## This function is sometimes also called "reduce" or "inject".
## This function is similar to [std::scan], but it returns only the final
## accumulator.
func fold(inputs: [I], initial: A, operation: func (A, I): A): A
where I, A
## Applies an operation to each input, using the output from the previous
## iteration.
##
## The `operation` takes two arguments: accumulator and an input.
## It returns the value that the accumulator should have in next iteration.
##
## The `initial` value is the accumulator for the first call.
##
## Returns values of all produced accumulators.
##
## This function is similar to [std::fold], but it returns all accumulators,
## instead of only the final one.
func scan(inputs: [I], initial: A, operation: func (A, I): A): [A]
where I, A
# func apply(
# value: I,
# function: func (I): O
# ): O
# where I, O -> function(value)
## Applies an operation to an array, until it returns an empty array.
## First operation is supplied with the initial array.
## Each following operation is supplied with result of previous operation.
##
## Returns all produced arrays concatenated together, including initial array.
##
## In SQL, this is known as "RECURSIVE CTE" or "recursive join".
func apply_until_empty(initial: [T], operation: func ([T]): [T]): [T]
where T
## Returns an array of sequential integers
## from start (inclusive) to end (exclusive).
##
## For example: `sequence(0, 3) == [0, 1, 2]`
func sequence(start: N, end: N): [N]
where N: int8 | int16 | int32 | int64 | uint8 | uint16 | uint32 | uint64
# --- Aggregation functions ---
## Find the minimum value in the array.
## Returns `none` when array is empty.
func min(array: [T]): enum {none, some: T}
where T: primitive
## Find the maximum value in the array.
## Returns `none` when array is empty.
func max(array: [T]): enum {none, some: T}
where T: primitive
## Compute sum of all number in an array.
## Returns zero when array is empty.
func sum(column: [T]): T
where T: number
## Compute arithmetic mean of an array of numbers.
func mean(array: [T]): float64
where T: number
## Returns `true` if all items in the array are `true`.
func all(values: [bool]): bool
## Returns `true` if any value in the array is `true`.
func any(values: [bool]): bool
## Returns `true` if the haystack array contains an item equal to `needle`.
func contains(haystack: [T], needle: T): bool
where T: primitive -> (
haystack | filter(x -> eq(x, needle)) | is_empty | not
)
# --- Window functions ---
## Shifts array items backwards by an offset.
## Items in front are set to their default value.
##
## For example, `lag(["a", "b", "c"], 1)` is `["", "a", "b"]`.
func lag(array: [T], offset: int64): [T]
where T
## Shifts array items forwards by an offset.
## Items in back are set to their default value.
##
## For example, `lead(["a", "b", "c"], 1)` is `["b", "c", ""]`.
func lead(array: [T], offset: int64): [T]
where T
## Computes rolling arithmetic mean over an array of numbers.
## Also known as moving average.
##
## Returns an array of same length as the input,
## where each item is the mean of the corresponding input item and
## a number of preceding and following items.
##
## For example, `rolling_mean(..., 1, 2)` computes mean of each item, along with
## 1 preceding and 2 following items, which is 4 items total.
func rolling_mean(array: [T], preceding: uint32, following: uint32): [float64]
where T: number
## Computes rank of each array item.
##
## Rank is the number of items that are less than current item, plus one.
##
## The values range from `1` to `n` (number of items).
## Also known as "min rank".
##
## For example, `rank(["a", "b", "b", "c"])` is `[1, 2, 2, 4]`.
func rank(array: [T]): [int32]
where T: primitive
## Computes dense rank of each array item.
##
## Dense rank is the number of unique items that are less than current item,
## plus one.
##
## The values range from `1` to `n` (number of unique items).
##
## For example, `rank_dense(["a", "b", "b", "c"])` is `[1, 2, 2, 3]`.
func rank_dense(array: [T]): [int32]
where T: primitive
## Computes percentile rank of each array item.
##
## Percentile rank is the item rank rescaled to range from 0.0 to 1.0.
##
## The values range from 0.0 to 1.0.
##
## For example, `rank_percentile(["a", "b", "b", "c"])` is
## `[0.0, 0.33333, 0.25, 1.0]`.
func rank_percentile(array: [T]): [float64]
where T: primitive
## Computes ECDF (Empirical Cumulative Density Function) of array items.
##
## Value of each item i is the number of items that are less than or equal to i,
## divided by the total number of items.
##
## The values range from `1.0/n` to `1.0`.
##
## For example, `cume_dist(["a", "b", "b", "c"])` is
## `[0.25, 0.75, 0.75, 1.0]`.
func cume_dist(array: [T]): [float64]
where T: primitive
## Mathematical functions
module math {
## Absolute value
func abs(val: T): T
where T: number
# func sqrt(val: int64): int64
# func ceil(val: int64): int64
# func floor(val: int64): int64
# func round(val: int64, n_digits: int64): int64
const pi32: float32 = 3.1415927
const pi64: float64 = 3.141592653589793
# TODO: exponent should probably always be uint32
func pow(val: T, exponent: T): T
where T: int64 | float64
# func exp(val: int64): int64
# func ln(val: int64): int64
# func log10(val: int64): int64
# func log(base: int64, val: int64): int64
# func degrees(val: int64): int64
# func radians(val: int64): int64
# func cos(val: int64): int64
# func acos(val: int64): int64
# func sin(val: int64): int64
# func asin(val: int64): int64
# func tan(val: int64): int64
# func atan(val: int64): int64
}
## Text functions
module text {
func concat(left: text, right: text): text
func length(value: text): uint32
## Converts ASCII code to its corresponding Unicode character.
## This function always succeeds, because ASCII is a subset of Unicode.
func from_ascii(ascii: uint8): text
## Joins parts together, placing separator between each pair.
func join(parts: [text], separator: text): text
## Splits text into substrings at every occurrence of a separator.
##
## For example, `split("1_two_3", "_")` is ["1", "two", "3"]`
func split(value: text, separator: text): [text]
# func lower(value: text): text
# func upper(value: text): text
# func ltrim(value: text): text
# func rtrim(value: text): text
# func trim(value: text): text
# func substring(value: text, offset: int64, length: int64): text
# func replace(value: text, pattern: text, replacement: text): text
## Tests if a text starts with a prefix.
func starts_with(value: text, prefix: text): bool
## Tests if a text contains a pattern.
func contains(value: text, pattern: text): bool
## Tests if a text ends with a suffix.
func ends_with(value: text, suffix: text): bool
# func regex_search(haystack: text, pattern: text): bool
}
module fs {
func read_parquet(file_name: text): [R]
where R: {..}
func write_parquet(data: [R], file_name: text): {}
where R: {..}
}
## Module available only when targeting SQL databases.
## Currently this includes only `sql-pg` format.
module sql {
## Reads rows from a table.
##
## The order of table rows is unknown and might change over repeated invocations.
##
## Table identifier can contain slashes to denote table namespaces.
## Any additional slashes will remain in table name verbatim.
## For example:
## - `from("my_schema/invoices")` will read from `my_schema.invoices`,
## - `from("my_schema/hello/world")` will read from `my_schema."hello/world"`.
func from(const table_identifier: text): [R]
where R: {..}
## Inserts rows into a table.
##
## For table identifier format, see documentation of [std::sql::from].
func insert(rows: [R], const table_identifier: text): {}
where R: {..}
## Updates rows in a table.
##
## The `updater` function is applied to each row in the table. When it returns
## `.none` the row is not updated, and when it return `.some(new_value)` it is
## updated to the new value.
##
## The function receives the current row values and can use them to decide
## whether to update and what the new values should be.
##
## Example:
## ```lt
## std::sql::update(
## "users",
## func (u: User) -> if u.age > 65 then (
## .some({status = "senior", ..u})
## ) else (
## .none
## )
## )
## ```
##
## For table identifier format, see documentation of [std::sql::from].
func update(
const table: text,
updater: func (R): enum {none, some: R},
): {}
where R: {..}
## Evaluates raw SQL.
##
## This function is an escape hatch for accessing SQL directly.
## It should be used as a last resort, because it circumvents type checking
## and can cause program panic on malformed SQL or incorrect resulting type.
##
## Resulting type of the expression must match the "SQL representation"
## of the Lutra type:
## - primitives -> a single column,
## - tuples -> one column for each tuple field,
## - nested tuples -> unpacked into parent relation,
## - arrays -> columns of the inner object,
## - enums -> one tag column, followed by columns of each of the variants,
## - enums that are the option -> one nullable column.
func raw(const sql_source: text): R
where R
}
## An instant in time. Timestamp without a timezone.
##
## Backed by a signed 64-bit integer, indicating microseconds since the Unix
## epoch (1970-01-01T00:00:00.000 UTC), excluding leap seconds.
# TODO: support for timezone and other time units.
type Timestamp(microseconds: int64)
## Elapsed days since Unix Epoch (1970-01-01).
##
## Backed by a signed 32-bit integer, representing number of days.
type Date(days_epoch: int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of seconds.
# type Time32Second(int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of milliseconds.
# type Time32Millisecond(int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of microseconds.
# type Time32Microsecond(int32)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 64-bit integer, representing number of seconds.
# type Time64Second(int64)
# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 64-bit integer, representing number of milliseconds.
# type Time64Millisecond(int64)
## Length of time, unrelated to calendar events.
## Can be interpreted as duration or offset from the midnight.
##
## Backed by a signed 64-bit integer, representing number of microseconds.
type Time(microseconds: int64)
# func to_time64_second(x: D): Time64Second
# where D: Time32Second -> x.0 | to_int64 | Time64Second
# where D: Time32Millisecond -> x.0 | to_int64 | mul(1000) | Time64Second
# where D: Time32Microsecond -> x.0 | to_int64 | mul(1000000) | Time64Second
# where D: Time64Second -> x
# where D: Time64Millisecond -> x.0 | mul(1000) | Time64Second
# where D: Time64Microsecond -> x.0 | mul(1000000) | Time64Second
# func to_time64_millisecond(x: D): Time64Millisecond
# where D: Time32Second -> x.0 | to_int64 | div(1000) | Time64Millisecond
# where D: Time32Millisecond -> x.0 | to_int64 | Time64Millisecond
# where D: Time32Microsecond -> x.0 | to_int64 | mul(1000) | Time64Millisecond
# where D: Time64Second -> x.0 | div(1000) | Time64Millisecond
# where D: Time64Millisecond -> x
# where D: Time64Microsecond -> x.0 | mul(1000) | Time64Millisecond
# func to_time64_microsecond(x: D): Time64Microsecond
# where D: Time32Second -> x.0 | to_int64 | div(1000000) | Time64Microsecond
# where D: Time32Millisecond -> x.0 | to_int64 | div(1000) | Time64Microsecond
# where D: Time32Microsecond -> x.0 | to_int64 | Time64Microsecond
# where D: Time64Second -> x.0 | div(1000000) | Time64Microsecond
# where D: Time64Millisecond -> x.0 | div(1000) | Time64Microsecond
# where D: Time64Microsecond -> x
func add_time(a: Time, b: Time): Time -> (add(a.0, b.0) | Time)
func sub_timestamp(a: Timestamp, b: Timestamp): Time -> (sub(a.0, b.0) | Time)
## Returns offset of a time zone from UTC on a given date.
func timezone_offset(time_zone: text, d: Date): Time -> (
sub_timestamp(date::to_timestamp(d, time_zone), date::to_timestamp(d, "UTC"))
)
module date {
## Computes time duration between two dates.
##
## Dates are assumed to be in the same timezone.
## Duration is measured from one midnight to another.
func sub(a: super::Date, b: super::Date): super::Time -> (
super::sub(a.days_epoch, b.days_epoch)
| super::to_int64
| super::mul(86400000000)
| super::Time
)
## Computes the timestamp of a local date at a timezone.
func to_timestamp(date: super::Date, time_zone: text): super::Timestamp
## Converts a date into year, month, and day numbers.
## Month and day start with 1.
func to_year_month_day(date: super::Date): {
year: int32, month: uint8, day: uint8
}
}
module timestamp {
## Computes the local date of a timestamp at a timezone.
func to_date(timestamp: super::Timestamp, time_zone: text): super::Date
}
## Exact decimal value with a fixed `SCALE`, which is the number of digits past
## the decimal point.
## Currently, `SCALE` is 2. This will be configurable in the future.
## Can hold values from `-10**(19-SCALE)` to `10**(19-SCALE)`.
##
## Backed by a signed 64-bit integer.
# TODO: implement const type param for SCALE.
type Decimal(int64)