lutra-compiler 0.5.1

# --- Utils ---

## Constructs the usual "default" value
##
## For `bool` it is false.
## For numbers, this is zero.
## For `text` it is an empty text.
## For tuples, it a tuple with every field set to default value.
## For arrays, it is an empty array.
## For enums, it the first variant.
func default(): T
where T

# --- Casts ---

## Converts a number to `int8`
func to_int8(x: T): int8
where T: number

## Converts a number to `int16`
func to_int16(x: T): int16
where T: number

## Converts a number to `int32`
func to_int32(x: T): int32
where T: number

## Converts a number to `int64`
func to_int64(x: T): int64
where T: number

## Converts a number to `uint8`
func to_uint8(x: T): uint8
where T: number

## Converts a number to `uint16`
func to_uint16(x: T): uint16
where T: number

## Converts a number to `uint32`
func to_uint32(x: T): uint32
where T: number

## Converts a number to `uint64`
func to_uint64(x: T): uint64
where T: number

## Converts a number to `float32`
func to_float32(x: T): float32
where T: number

## Converts a number to `float64`
func to_float64(x: T): float64
where T: number

## Converts a value to `text`
func to_text(x: T): text
where T: primitive

# --- Operators ---

## Multiplies two numbers.
func mul(left: T, right: T): T
where T: number

## Divides two numbers.
func div(left: T, right: T): T
where T: number

## Division remainder.
func mod(left: T, right: T): T
where T: number

## Adds two numbers.
func add(left: T, right: T): T
where T: number

## Subtracts two numbers.
func sub(left: T, right: T): T
where T: number

## Negates a number.
func neg(x: T): T
where T: int8 | int16 | int32 | int64 | float32 | float64

## Compare two values to determine if the first is less, equal, or greater than
## the second.
func cmp(a: T, b: T): Ordering
where T: primitive

type Ordering: enum {less, equal, greater}

## Tests if values are equal. Used by `==` operator.
func eq(left: T, right: T): bool
where T: primitive -> (
  match cmp(left, right) {
    .equal => true,
    _ => false,
  }
)

## Tests if values are not equal. Used by `!=` operator.
func ne(left: T, right: T): bool
where T: primitive  -> (
  match cmp(left, right) {
    .equal => false,
    _ => true,
  }
)

## Tests if left value is greater than the right. Used by `>` operator.
func gt(left: T, right: T): bool
where T: primitive -> (
  match cmp(left, right) {
    .greater => true,
    _ => false,
  }
)

## Tests if left value is less than the right. Used by `<` operator.
func lt(left: T, right: T): bool
where T: primitive -> (
  match cmp(left, right) {
    .less => true,
    _ => false,
  }
)

## Tests if left value is greater or equal to the right. Used by `>=` operator.
func gte(left: T, right: T): bool
where T: primitive -> (
  match cmp(left, right) {
    .less => false,
    _ => true,
  }
)

## Tests if left value is less or equal to the right. Used by `<=` operator.
func lte(left: T, right: T): bool
where T: primitive -> (
  match cmp(left, right) {
    .greater => false,
    _ => true,
  }
)

## Tests if both values are true. Used by `&&` operator.
func and(left: bool, right: bool): bool

## Tests if either of the values is true. Used by `||` operator.
func or(left: bool, right: bool): bool

## Inverts a boolean. Used by `!` operator.
func not(expr: bool): bool

# --- Option functions ---
module option {
  func is_some(value: enum {none, some: T}): bool
  where T
  -> (
    match value {
      .some => true,
      .none => false,
    }
  )

  func is_none(value: enum {none, some: T}): bool
  where T
  -> (
    match value {
      .some => false,
      .none => true,
    }
  )

  ## When value is `.none`, returns a fallback value instead.
  func or_else(value: enum {none, some: T}, fallback: T): T
  where T
  -> (
    match value {
      .some(x) => x,
      .none => fallback,
    }
  )

  ## When value is `.none`, returns the default value instead.
  ##
  ## Default value depends on the type. See [super::default].
  ## For numbers it is 0, for booleans it is false.
  func or_default(value: enum {none, some: T}): T
  where T
  -> (
    match value {
      .some(x) => x,
      .none => super::default(),
    }
  )

  ## Zips two options together into an option of a tuple.
  ## Returns `.some({a, b})` only if both `a` and `b` are `.some`.
  # TODO: maybe merge this into std::zip?
  func zip(
    a: enum {none, some: A},
    b: enum {none, some: B},
  ): enum {none, some: {A, B}}
  where A, B
  -> (
    match a {
      .none => .none,
      .some(a) => match b {
        .none => .none,
        .some(b) => .some({a, b}),
      }
    }
  )

  ## Maps value in `.some`. Does nothing if the value is `.none`.
  # TODO: maybe merge this into std::map?
  func map(
    value: enum {none, some: I},
    mapper: func (I): O
  ): enum {none, some: O}
  where I, O
  -> (
    match value {
      .none => .none,
      .some(v) => .some(mapper(v))
    }
  )

  ## Maps value in `.some`. Does nothing if the value is `.none`.
  # TODO: maybe merge this into std::flat_map?
  func flat_map(
    value: enum {none, some: I},
    mapper: func (I): enum {none, some: O}
  ): enum {none, some: O}
  where I, O
  -> (
    match value {
      .none => .none,
      .some(v) => mapper(v)
    }
  )

  ## When value is `.none` returns fallback, otherwise maps value in `.some`.
  func map_or(
    value: enum {none, some: I},
    fallback: O,
    mapper: func (I): O
  ): O
  where I, O
  -> (
    match value {
      .none => fallback,
      .some(v) => mapper(v)
    }
  )
}

# --- Array functions ---

func index(array: [T], position: int64): enum {none, some: T}
where T

func count(array: [T]): int64
where T

func is_empty(array: [T]): bool
where T
-> eq(count(array), 0)

func map(array: [I], mapper: func (I): O): [O]
where I, O

func flat_map(array: [I], mapper: func (I): [O]): [O]
where I, O

func filter(array: [T], condition: func (T): bool): [T]
where T

func find(array: [T], condition: func (T): bool): enum {none, some: T}
where T
-> (array | filter(condition) | index(0))

func slice(array: [T], start: int64, end: int64): [T]
where T

func sort(array: [I], key: func (I): K): [I]
where I, K: primitive

func to_columnar(rows: [T]): {for f: F in T do f: [F]}
where T: {..}

func from_columnar(columnar: {for f: F in T do f: [F]}): [T]
where T: {..}

func map_columnar(
  x: [I], mapper: func ({for f: F in I do f: [F]}): {for f: F in O do f: [F]}
): [O]
where I: {..}, O: {..}
-> (x | to_columnar | mapper | from_columnar)

func aggregate(x: [T], mapper: func ({for f: F in T do f: [F]}): O): O
where T: {..}, O
-> (x | to_columnar | mapper)

func zip(left: [L], right: [R]): [{L, R}]
where L, R

## Groups array items by a key.
## Returns groups, which contain the key and an array of values in this group.
func group(input: [I], get_key: func (I): K): [{key: K, values: [I]}]
where I, K

## Groups array items by a key.
## Returns groups, which contain the key and an array of values in this group.
func group_map(input: [I], get_key: func (I): K, mapper: func (K, [I]): O): [O]
where I, K, O
-> (input | group(get_key) | map(x -> mapper(x.key, x.values)))

## Deduplicates array items.
func distinct(array: [T]): [T]
where T
-> (array | group(x -> x) | map(x -> x.key))

func append(top: [T], bottom: [T]): [T]
where T

## Folds every input into the an accumulator value by applying an operation,
## returning the final result.
##
## The `operation` takes two arguments: accumulator and an input.
## It returns the value that the accumulator should have in next iteration.
##
## The `initial` value is the accumulator for the first call.
##
## After folding all inputs, the last accumulator is returned.
##
## This function is sometimes also called "reduce" or "inject".
## This function is similar to [std::scan], but it returns only the final
## accumulator.
func fold(inputs: [I], initial: A, operation: func (A, I): A): A
where I, A

## Applies an operation to each input, using the output from the previous
## iteration.
##
## The `operation` takes two arguments: accumulator and an input.
## It returns the value that the accumulator should have in next iteration.
##
## The `initial` value is the accumulator for the first call.
##
## Returns values of all produced accumulators.
##
## This function is similar to [std::fold], but it returns all accumulators,
## instead of only the final one.
func scan(inputs: [I], initial: A, operation: func (A, I): A): [A]
where I, A

# func apply(
#   value: I,
#   function: func (I): O
# ): O
# where I, O -> function(value)

## Applies an operation to an array, until it returns an empty array.
## First operation is supplied with the initial array.
## Each following operation is supplied with result of previous operation.
##
## Returns all produced arrays concatenated together, including initial array.
##
## In SQL, this is known as "RECURSIVE CTE" or "recursive join".
func apply_until_empty(initial: [T], operation: func ([T]): [T]): [T]
where T

## Returns an array of sequential integers
## from start (inclusive) to end (exclusive).
##
## For example: `sequence(0, 3) == [0, 1, 2]`
func sequence(start: N, end: N): [N]
where N: int8 | int16 | int32 | int64 | uint8 | uint16 | uint32 | uint64

# --- Aggregation functions ---

## Find the minimum value in the array.
## Returns `none` when array is empty.
func min(array: [T]): enum {none, some: T}
where T: primitive

## Find the maximum value in the array.
## Returns `none` when array is empty.
func max(array: [T]): enum {none, some: T}
where T: primitive

## Compute sum of all number in an array.
## Returns zero when array is empty.
func sum(column: [T]): T
where T: number

## Compute arithmetic mean of an array of numbers.
func mean(array: [T]): float64
where T: number

## Returns `true` if all items in the array are `true`.
func all(values: [bool]): bool

## Returns `true` if any value in the array is `true`.
func any(values: [bool]): bool

## Returns `true` if the haystack array contains an item equal to `needle`.
func contains(haystack: [T], needle: T): bool
where T: primitive -> (
  haystack | filter(x -> eq(x, needle)) | is_empty | not
)

# --- Window functions ---

## Shifts array items backwards by an offset.
## Items in front are set to their default value.
##
## For example, `lag(["a", "b", "c"], 1)` is `["", "a", "b"]`.
func lag(array: [T], offset: int64): [T]
where T

## Shifts array items forwards by an offset.
## Items in back are set to their default value.
##
## For example, `lead(["a", "b", "c"], 1)` is `["b", "c", ""]`.
func lead(array: [T], offset: int64): [T]
where T

## Computes rolling arithmetic mean over an array of numbers.
## Also known as moving average.
##
## Returns an array of same length as the input,
## where each item is the mean of the corresponding input item and
## a number of preceding and following items.
##
## For example, `rolling_mean(..., 1, 2)` computes mean of each item, along with
## 1 preceding and 2 following items, which is 4 items total.
func rolling_mean(array: [T], preceding: uint32, following: uint32): [float64]
where T: number

## Computes rank of each array item.
##
## Rank is the number of items that are less than current item, plus one.
##
## The values range from `1` to `n` (number of items).
## Also known as "min rank".
##
## For example, `rank(["a", "b", "b", "c"])` is `[1, 2, 2, 4]`.
func rank(array: [T]): [int32]
where T: primitive

## Computes dense rank of each array item.
##
## Dense rank is the number of unique items that are less than current item,
## plus one.
##
## The values range from `1` to `n` (number of unique items).
##
## For example, `rank_dense(["a", "b", "b", "c"])` is `[1, 2, 2, 3]`.
func rank_dense(array: [T]): [int32]
where T: primitive

## Computes percentile rank of each array item.
##
## Percentile rank is the item rank rescaled to range from 0.0 to 1.0.
##
## The values range from 0.0 to 1.0.
##
## For example, `rank_percentile(["a", "b", "b", "c"])` is
## `[0.0, 0.33333, 0.25, 1.0]`.
func rank_percentile(array: [T]): [float64]
where T: primitive

## Computes ECDF (Empirical Cumulative Density Function) of array items.
##
## Value of each item i is the number of items that are less than or equal to i,
## divided by the total number of items.
##
## The values range from `1.0/n` to `1.0`.
##
## For example, `cume_dist(["a", "b", "b", "c"])` is
## `[0.25, 0.75, 0.75, 1.0]`.
func cume_dist(array: [T]): [float64]
where T: primitive

## Mathematical functions
module math {
  ## Absolute value
  func abs(val: T): T
  where T: number

  # func sqrt(val: int64): int64

  # func ceil(val: int64): int64

  # func floor(val: int64): int64

  # func round(val: int64, n_digits: int64): int64

  const pi32: float32 = 3.1415927

  const pi64: float64 = 3.141592653589793

  # TODO: exponent should probably always be uint32
  func pow(val: T, exponent: T): T
  where T: int64 | float64

  # func exp(val: int64): int64
  # func ln(val: int64): int64
  # func log10(val: int64): int64
  # func log(base: int64, val: int64): int64
  # func degrees(val: int64): int64
  # func radians(val: int64): int64
  # func cos(val: int64): int64
  # func acos(val: int64): int64
  # func sin(val: int64): int64
  # func asin(val: int64): int64
  # func tan(val: int64): int64
  # func atan(val: int64): int64
}

## Text functions
module text {
  func concat(left: text, right: text): text

  func length(value: text): uint32

  ## Converts ASCII code to its corresponding Unicode character.
  ## This function always succeeds, because ASCII is a subset of Unicode.
  func from_ascii(ascii: uint8): text

  ## Joins parts together, placing separator between each pair.
  func join(parts: [text], separator: text): text

  ## Splits text into substrings at every occurrence of a separator.
  ##
  ## For example, `split("1_two_3", "_")` is ["1", "two", "3"]`
  func split(value: text, separator: text): [text]

  # func lower(value: text): text
  # func upper(value: text): text

  # func ltrim(value: text): text
  # func rtrim(value: text): text
  # func trim(value: text): text

  # func substring(value: text, offset: int64, length: int64): text
  # func replace(value: text, pattern: text, replacement: text): text

  ## Tests if a text starts with a prefix.
  func starts_with(value: text, prefix: text): bool

  ## Tests if a text contains a pattern.
  func contains(value: text, pattern: text): bool

  ## Tests if a text ends with a suffix.
  func ends_with(value: text, suffix: text): bool

  # func regex_search(haystack: text, pattern: text): bool
}

module fs {
  func read_parquet(file_name: text): [R]
  where R: {..}

  func write_parquet(data: [R], file_name: text): {}
  where R: {..}
}

## Module available only when targeting SQL databases.
## Currently this includes only `sql-pg` format.
module sql {
  ## Reads rows from a table.
  ##
  ## The order of table rows is unknown and might change over repeated invocations.
  ##
  ## Table identifier can contain slashes to denote table namespaces.
  ## Any additional slashes will remain in table name verbatim.
  ## For example:
  ## - `from("my_schema/invoices")` will read from `my_schema.invoices`,
  ## - `from("my_schema/hello/world")` will read from `my_schema."hello/world"`.
  func from(const table_identifier: text): [R]
  where R: {..}

  ## Inserts rows into a table.
  ##
  ## For table identifier format, see documentation of [std::sql::from].
  func insert(rows: [R], const table_identifier: text): {}
  where R: {..}

  ## Updates rows in a table.
  ##
  ## The `updater` function is applied to each row in the table. When it returns
  ## `.none` the row is not updated, and when it return `.some(new_value)` it is
  ## updated to the new value.
  ##
  ## The function receives the current row values and can use them to decide
  ## whether to update and what the new values should be.
  ##
  ## Example:
  ## ```lt
  ## std::sql::update(
  ##   "users",
  ##   func (u: User) -> if u.age > 65 then (
  ##     .some({status = "senior", ..u})
  ##   ) else (
  ##     .none
  ##   )
  ## )
  ## ```
  ##
  ## For table identifier format, see documentation of [std::sql::from].
  func update(
    const table: text,
    updater: func (R): enum {none, some: R},
  ): {}
  where R: {..}

  ## Evaluates raw SQL.
  ##
  ## This function is an escape hatch for accessing SQL directly.
  ## It should be used as a last resort, because it circumvents type checking
  ## and can cause program panic on malformed SQL or incorrect resulting type.
  ##
  ## Resulting type of the expression must match the "SQL representation"
  ## of the Lutra type:
  ## - primitives -> a single column,
  ## - tuples -> one column for each tuple field,
  ## - nested tuples -> unpacked into parent relation,
  ## - arrays -> columns of the inner object,
  ## - enums -> one tag column, followed by columns of each of the variants,
  ## - enums that are the option -> one nullable column.
  func raw(const sql_source: text): R
  where R
}

## An instant in time. Timestamp without a timezone.
##
## Backed by a signed 64-bit integer, indicating microseconds since the Unix
## epoch (1970-01-01T00:00:00.000 UTC), excluding leap seconds.
# TODO: support for timezone and other time units.
type Timestamp(microseconds: int64)

## Elapsed days since Unix Epoch (1970-01-01).
##
## Backed by a signed 32-bit integer, representing number of days.
type Date(days_epoch: int32)

# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of seconds.
# type Time32Second(int32)

# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of milliseconds.
# type Time32Millisecond(int32)

# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 32-bit integer, representing number of microseconds.
# type Time32Microsecond(int32)

# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 64-bit integer, representing number of seconds.
# type Time64Second(int64)

# ## Length of time, unrelated to calendar events.
# ##
# ## Backed by a signed 64-bit integer, representing number of milliseconds.
# type Time64Millisecond(int64)

## Length of time, unrelated to calendar events.
## Can be interpreted as duration or offset from the midnight.
##
## Backed by a signed 64-bit integer, representing number of microseconds.
type Time(microseconds: int64)

# func to_time64_second(x: D): Time64Second
# where D: Time32Second -> x.0 | to_int64 | Time64Second
# where D: Time32Millisecond -> x.0 | to_int64 | mul(1000) | Time64Second
# where D: Time32Microsecond -> x.0 | to_int64 | mul(1000000) | Time64Second
# where D: Time64Second -> x
# where D: Time64Millisecond -> x.0 | mul(1000) | Time64Second
# where D: Time64Microsecond -> x.0 | mul(1000000) | Time64Second

# func to_time64_millisecond(x: D): Time64Millisecond
# where D: Time32Second -> x.0 | to_int64 | div(1000) | Time64Millisecond
# where D: Time32Millisecond -> x.0 | to_int64 | Time64Millisecond
# where D: Time32Microsecond -> x.0 | to_int64 | mul(1000) | Time64Millisecond
# where D: Time64Second -> x.0 | div(1000) | Time64Millisecond
# where D: Time64Millisecond -> x
# where D: Time64Microsecond -> x.0 | mul(1000) | Time64Millisecond

# func to_time64_microsecond(x: D): Time64Microsecond
# where D: Time32Second -> x.0 | to_int64 | div(1000000) | Time64Microsecond
# where D: Time32Millisecond -> x.0 | to_int64 | div(1000) | Time64Microsecond
# where D: Time32Microsecond -> x.0 | to_int64 | Time64Microsecond
# where D: Time64Second -> x.0 | div(1000000) | Time64Microsecond
# where D: Time64Millisecond -> x.0 | div(1000) | Time64Microsecond
# where D: Time64Microsecond -> x

func add_time(a: Time, b: Time): Time -> (add(a.0, b.0) | Time)

func sub_timestamp(a: Timestamp, b: Timestamp): Time -> (sub(a.0, b.0) | Time)

## Returns offset of a time zone from UTC on a given date.
func timezone_offset(time_zone: text, d: Date): Time -> (
  sub_timestamp(date::to_timestamp(d, time_zone), date::to_timestamp(d, "UTC"))
)

module date {
  ## Computes time duration between two dates.
  ##
  ## Dates are assumed to be in the same timezone.
  ## Duration is measured from one midnight to another.
  func sub(a: super::Date, b: super::Date): super::Time -> (
    super::sub(a.days_epoch, b.days_epoch)
    | super::to_int64
    | super::mul(86400000000)
    | super::Time
  )

  ## Computes the timestamp of a local date at a timezone.
  func to_timestamp(date: super::Date, time_zone: text): super::Timestamp

  ## Converts a date into year, month, and day numbers.
  ## Month and day start with 1.
  func to_year_month_day(date: super::Date): {
    year: int32, month: uint8, day: uint8
  }
}

module timestamp {
  ## Computes the local date of a timestamp at a timezone.
  func to_date(timestamp: super::Timestamp, time_zone: text): super::Date
}

## Exact decimal value with a fixed `SCALE`, which is the number of digits past
## the decimal point.
## Currently, `SCALE` is 2. This will be configurable in the future.
## Can hold values from `-10**(19-SCALE)` to `10**(19-SCALE)`.
##
## Backed by a signed 64-bit integer.
# TODO: implement const type param for SCALE.
type Decimal(int64)