infino 0.1.0

A fast retrieval engine that stores data on object storage and runs SQL, full-text search, and vector search over it from a single system — search-on-Parquet.
Documentation
# infino default configuration.
#
# This file is compiled into the binary via include_str! and acts as
# the floor of the config stack. Override at runtime via (in order of
# increasing precedence):
#
#   1. /etc/infino/config.yaml             (system-wide)
#   2. $XDG_CONFIG_HOME/infino/config.yaml (user; falls back to
#                                            $HOME/.config/infino/...)
#   3. ./infino.yaml                       (per-project / per-cwd)
#   4. INFINO_<UPPERCASE_FIELD>            (per-process env var)
#
# Each layer is a partial override — keys not set in a higher layer
# fall through to lower layers.
#
# Nested keys are addressed with `__` (double underscore) in env vars.
# Examples below.

# Supertable runtime knobs. Reader fan-out (skip + per-superfile
# search + top-k merge) and writer commit-time rayon-shard run on
# separate pools so a long-running commit can't spike reader p99 or
# vice versa.
#
# Thread-count values:
#   - `auto` — resolves at runtime to num_cpus for reader_threads
#     and max(1, num_cpus / 2) for writer_threads. Hardware-portable
#     default; the same shipped config works on a 4-core dev laptop
#     and a 96-core production node.
#   - a positive integer — explicit override. Use when you've
#     measured a workload-specific tuning.
#
# Env overrides (note the double underscore for nesting):
#   INFINO_SUPERTABLE__READER_THREADS=8
#   INFINO_SUPERTABLE__WRITER_THREADS=4
#   INFINO_SUPERTABLE__ID_COLUMN=row_id
#   INFINO_SUPERTABLE__COMMIT_THRESHOLD_SIZE_MB=2048
#   INFINO_SUPERTABLE__VERIFY_CRC_ON_OPEN=false
supertable:
  reader_threads: auto
  writer_threads: auto

  # Name of the system-managed primary-key column the supertable
  # injects on every append(). The column type is fixed by the
  # supertable layer; only the name is configurable. Leading
  # underscore signals a system-owned field. Change this if
  # `_id` collides with a business field name; otherwise leave
  # as default.
  id_column: _id

  # Threshold above which the supertable's writer triggers an
  # internal commit() to flush the in-memory buffer to disk-
  # equivalent (one superfile per writer-pool thread). Specified
  # in mebibytes (1 MiB = 1024 × 1024 bytes). Set 0 to disable
  # auto-flush — only caller-driven commit() will produce
  # superfiles.
  commit_threshold_size_mb: 1024

  # Verify the trailing whole-blob CRC and per-subsection CRCs
  # on every SuperfileReader::open. Defaults to true. Set to
  # false only when the underlying storage already validates
  # checksums (content-addressed object store, ZFS, etc.) —
  # skipping the scan trades that storage-layer guarantee for
  # faster cold opens.
  verify_crc_on_open: true

# Persistent storage wiring. Defaults to in-memory-only. Set
# `backend: s3` plus `bucket` / `prefix` to make
# SupertableOptions::apply_config attach an S3StorageProvider.
# `backend: azure` attaches an AzureStorageProvider, with `bucket`
# naming the Azure container.
#
# Env overrides:
#   INFINO_STORAGE__BACKEND=s3
#   INFINO_STORAGE__BUCKET=my-bucket
#   INFINO_STORAGE__PREFIX=tables/my-table
#   INFINO_STORAGE__DISK_CACHE_ROOT=/mnt/nvme/infino-cache
storage:
  backend: none
  local_root:
  bucket:
  prefix: ""

  # When set, storage-backed reads use DiskCacheStore. For S3,
  # lazy_foreground_with_background_fill is the object-store-native
  # cold path: foreground opens/searches with exact range GETs (or zero
  # superfile GETs at open when manifest open-batch bytes are present)
  # while background fill promotes the full superfile to mmap.
  disk_cache_root:
  disk_budget_bytes: 10737418240
  # Byte budget for the content-addressed manifest-part cache, held in a
  # manifest-parts/ subdirectory of disk_cache_root. On a hit the part
  # loader reads bytes from local disk instead of object storage; parts
  # are content-addressed, so cached files are never stale and survive
  # restarts. Independent of disk_budget_bytes. Default 2 GiB.
  manifest_disk_budget_bytes: 2147483648
  cold_fetch_mode: lazy_foreground_with_background_fill
  cold_fetch_streams: 8
  cold_fetch_chunk_bytes: 4194304
  mmap_cold_threshold_secs: 300
  mmap_sweep_interval_secs: 75

# Compaction merges the small superfiles produced by individual
# commits into one target-sized superfile, cutting query fan-out.
# Sizes are in mb (1 MiB = 1024 × 1024 bytes).
#
# Env overrides (note the double underscore for nesting):
#   INFINO_COMPACTION__TARGET_SUPERFILE_SIZE_MB=2048
#   INFINO_COMPACTION__MIN_FILL_PERCENT=80
#   INFINO_COMPACTION__MAX_MEMORY_MB=3072
compaction:
  # Size a compacted output aims for.
  target_superfile_size_mb: 1024

  # Minimum estimated live bytes to trigger a merge,
  # as a percentage of `target_superfile_size_mb`.
  # E.g. at 80% with a 1 GiB target, two 200 MiB superfiles (400 MiB) do not compact.
  min_fill_percent: 80

  # Maximum memory budget for materializing inputs during a single merge, in MiB.
  max_memory_mb: 3072