lance-encoding 4.0.0

Encoders and decoders for the Lance file format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

syntax = "proto3";

package lance.table;

import "google/protobuf/any.proto";
import "google/protobuf/timestamp.proto";
import "file.proto";

/*

Format:

+----------------------------------------+
|       Encoded Column 0, Chunk 0        |
           ...
|       Encoded Column M, Chunk N - 1    |
|       Encoded Column M, Chunk N        |
|       Indices ...                      |
|       Chunk Position (M x N x 8)       |
|         Manifest (Optional)            |
|         Metadata                       |
| i64: metadata position                 |
| MAJOR_VERSION | MINOR_VERSION | "LANC" |
+----------------------------------------+
 */

// UUID type. encoded as 16 bytes.
message UUID {
  bytes uuid = 1;
}

// Manifest is a global section shared between all the files.
message Manifest {
  // All fields of the dataset, including the nested fields.
  repeated lance.file.Field fields = 1;

  // Schema metadata.
  map<string, bytes> schema_metadata = 5;

  // Fragments of the dataset.
  repeated DataFragment fragments = 2;

  // Snapshot version number.
  uint64 version = 3;

  // The file position of the version auxiliary data.
  //  * It is not inheritable between versions.
  //  * It is not loaded by default during query.
  uint64 version_aux_data = 4;

  message WriterVersion {
    // The name of the library that created this file.
    string library = 1;
    // The version of the library that created this file. Because we cannot assume
    // that the library is semantically versioned, this is a string. However, if it
    // is semantically versioned, it should be a valid semver string without any 'v'
    // prefix. For example: `2.0.0`, `2.0.0-rc.1`.
    //
    // For forward compatibility with older readers, when writing new manifests this
    // field should contain only the core version (major.minor.patch) without any
    // prerelease or build metadata. The prerelease/build info should be stored in
    // the separate prerelease and build_metadata fields instead.
    string version = 2;
    // Optional semver prerelease identifier.
    //
    // This field stores the prerelease portion of a semantic version separately
    // from the core version number. For example, if the full version is "2.0.0-rc.1",
    // the version field would contain "2.0.0" and prerelease would contain "rc.1".
    //
    // This separation ensures forward compatibility: older readers can parse the
    // clean version field without errors, while newer readers can reconstruct the
    // full semantic version by combining version, prerelease, and build_metadata.
    //
    // If absent, the version field is used as-is.
    optional string prerelease = 3;
    // Optional semver build metadata.
    //
    // This field stores the build metadata portion of a semantic version separately
    // from the core version number. For example, if the full version is
    // "2.0.0-rc.1+build.123", the version field would contain "2.0.0", prerelease
    // would contain "rc.1", and build_metadata would contain "build.123".
    //
    // If absent, no build metadata is present.
    optional string build_metadata = 4;
  }

  // The version of the writer that created this file.
  //
  // This information may be used to detect whether the file may have known bugs
  // associated with that writer.
  WriterVersion writer_version = 13;

  // If present, the file position of the index metadata.
  optional uint64 index_section = 6;

  // Version creation Timestamp, UTC timezone
  google.protobuf.Timestamp timestamp = 7;

  // Optional version tag
  string tag = 8;

  // Feature flags for readers.
  //
  // A bitmap of flags that indicate which features are required to be able to
  // read the table. If a reader does not recognize a flag that is set, it
  // should not attempt to read the dataset.
  //
  // Known flags:
  // * 1: deletion files are present
  // * 2: row ids are stable and stored as part of the fragment metadata.
  // * 4: use v2 format (deprecated)
  // * 8: table config is present
  uint64 reader_feature_flags = 9;

  // Feature flags for writers.
  //
  // A bitmap of flags that indicate which features must be used when writing to the
  // dataset. If a writer does not recognize a flag that is set, it should not attempt to
  // write to the dataset.
  //
  // The flag identities are the same as for reader_feature_flags, but the values of
  // reader_feature_flags and writer_feature_flags are not required to be identical.
  uint64 writer_feature_flags = 10;

  // The highest fragment ID that has been used so far.
  //
  // This ID is not guaranteed to be present in the current version, but it may
  // have been used in previous versions.
  //
  // For a single fragment, will be zero. For no fragments, will be absent.
  optional uint32 max_fragment_id = 11;

  // Path to the transaction file, relative to `{root}/_transactions`. The file at that
  // location contains a wire-format serialized Transaction message representing the
  // transaction that created this version.
  //
  // This string field "transaction_file" may be empty if no transaction file was written.
  //
  // The path format is "{read_version}-{uuid}.txn" where {read_version} is the version of
  // the table the transaction read from (serialized to decimal with no padding digits),
  // and {uuid} is a hyphen-separated UUID.
  string transaction_file = 12;

  // The file position of the transaction content. None if transaction is empty
  // This transaction content begins with the transaction content length as u32
  // If the transaction proto message has a length of `len`, the message ends at `len` + 4
  optional uint64 transaction_section = 21;

  // The next unused row id. If zero, then the table does not have any rows.
  //
  // This is only used if the "stable_row_ids" feature flag is set.
  uint64 next_row_id = 14;

  message DataStorageFormat {
    // The format of the data files (e.g. "lance")
    string file_format = 1;
    // The max format version of the data files. The format of the version can vary by
    // file_format and is not required to follow semver.
    //
    // Every file in this version of the dataset has the same file_format version.
    string version = 2;
  }

  // The data storage format
  //
  // This specifies what format is used to store the data files.
  DataStorageFormat data_format = 15;

  // Table config.
  //
  // Keys with the prefix "lance." are reserved for the Lance library. Other
  // libraries may wish to similarly prefix their configuration keys
  // appropriately.
  map<string, string> config = 16;

  // Metadata associated with the table.
  //
  // This is a key-value map that can be used to store arbitrary metadata
  // associated with the table.
  //
  // This is different than configuration, which is used to tell libraries how
  // to read, write, or manage the table.
  //
  // This is different than schema metadata, which is used to describe the
  // data itself and is attached to the output schema of scans.
  map<string, string> table_metadata = 19;

  // Field number 17 (`blob_dataset_version`) was used for a secondary blob dataset.
  reserved 17;
  reserved "blob_dataset_version";

  // The base paths of data files.
  //
  // This is used to determine the base path of a data file. In common cases data file paths are under current dataset base path.
  // But for shallow cloning, importing file and other multi-tier storage cases, the actual data files could be outside of the current dataset.
  // This field is used with the `base_id` in `lance.file.File` and `lance.file.DeletionFile`.
  //
  // For example, if we have a dataset with base path `s3://bucket/dataset`, we have a DataFile with base_id 0, we get the actual data file path by:
  // base_paths[id = 0] + /data/ + file.path
  // the key(a.k.a index) starts from 0, increased by 1 for each new base path.
  repeated BasePath base_paths = 18;

  // The branch of the dataset. None means main branch.
  optional string branch = 20;
} // Manifest

// external dataset base path
message BasePath {
  uint32 id = 1;
  // This is an alias name of the base path, it is optional.
  // When we use shallow clone and the target version is a tag, the tag name will be set here.
  optional string name = 2;
  // Flag indicating whether this path is a dataset root path or file directory:
  // - true:  Path is a dataset root (actual files under subdirectories like `data`, '_deletions')
  // - false: Path is a direct file directory (scenario like importing files)
  bool is_dataset_root = 3;
  // Note: This absolute path will be directly used by Path:parse(),
  string path = 4;
}

// Auxiliary Data attached to a version.
// Only load on-demand.
message VersionAuxData {
  // key-value metadata.
  map<string, bytes> metadata = 3;
}

// Metadata describing an index.
message IndexMetadata {
  // Unique ID of an index. It is unique across all the dataset versions.
  UUID uuid = 1;

  // The columns to build the index. These refer to file.Field.id.
  repeated int32 fields = 2;

  // Index name. Must be unique within one dataset version.
  string name = 3;

  // The version of the dataset this index was built from.
  uint64 dataset_version = 4;

  // A bitmap of the included fragment ids.
  //
  // This may by used to determine how much of the dataset is covered by the
  // index. This information can be retrieved from the dataset by looking at
  // the dataset at `dataset_version`. However, since the old version may be
  // deleted while the index is still in use, this information is also stored
  // in the index.
  //
  // The bitmap is stored as a 32-bit Roaring bitmap.
  bytes fragment_bitmap = 5;

  // Details, specific to the index type, which are needed to load / interpret the index
  //
  // Indices should avoid putting large amounts of information in this field, as it will
  // bloat the manifest.
  //
  // Indexes are plugins, and so the format of the details message is flexible and not fully
  // defined by the table format.  However, there are some conventions that should be followed:
  //
  // - When Lance APIs refer to indexes they will use the type URL of the index details as the
  //   identifier for the index type.  If a user provides a simple string identifier like
  //   "btree" then it will be converted to "/lance.table.BTreeIndexDetails"
  // - Type URLs comparisons are case-insensitive.  Thereform an index must have a unique type
  //   URL ignoring case.
  google.protobuf.Any index_details = 6;

  // The minimum lance version that this index is compatible with.
  optional int32 index_version = 7;

  // Timestamp when the index was created (UTC timestamp in milliseconds since epoch)
  //
  // This field is optional for backward compatibility. For existing indices created before
  // this field was added, this will be None/null.
  optional uint64 created_at = 8;

  // The base path index of the data file. Used when the file is imported or referred from another dataset.
  // Lance use it as key of the base_paths field in Manifest to determine the actual base path of the data file.
  optional uint32 base_id = 9;

  // List of files and their sizes for this index segment.
  // This enables skipping HEAD calls when opening indices and allows reporting
  // of index sizes without extra IO.
  // If this is empty, the index files sizes are unknown.
  repeated IndexFile files = 10;
}

// Metadata about a single file within an index segment.
message IndexFile {
  // Path relative to the index directory (e.g., "index.idx", "auxiliary.idx")
  string path = 1;
  // Size of the file in bytes
  uint64 size_bytes = 2;
}

// Index Section, containing a list of index metadata for one dataset version.
message IndexSection {
  repeated IndexMetadata indices = 1;
}

// A DataFragment is a set of files which represent the different columns of the same
// rows. If column exists in the schema of a dataset, but the file for that column does
// not exist within a DataFragment of that dataset, that column consists entirely of
// nulls.
message DataFragment {
  // The ID of a DataFragment is unique within a dataset.
  uint64 id = 1;

  repeated DataFile files = 2;

  // File that indicates which rows, if any, should be considered deleted.
  DeletionFile deletion_file = 3;

  // TODO: What's the simplest way we can allow an inline tombstone bitmap?

  // A serialized RowIdSequence message (see rowids.proto).
  //
  // These are the row ids for the fragment, in order of the rows as they appear.
  // That is, if a fragment has 3 rows, and the row ids are [1, 42, 3], then the
  // first row is row 1, the second row is row 42, and the third row is row 3.
  oneof row_id_sequence {
    // If small (< 200KB), the row ids are stored inline.
    bytes inline_row_ids = 5;
    // Otherwise, stored as part of a file.
    ExternalFile external_row_ids = 6;
  } // row_id_sequence

  oneof last_updated_at_version_sequence {
    // If small (< 200KB), the row latest updated versions are stored inline.
    bytes inline_last_updated_at_versions = 7;
    // Otherwise, stored as part of a file.
    ExternalFile external_last_updated_at_versions = 8;
  } // last_updated_at_version_sequence

  oneof created_at_version_sequence {
    // If small (< 200KB), the row created at versions are stored inline.
    bytes inline_created_at_versions = 9;
    // Otherwise, stored as part of a file.
    ExternalFile external_created_at_versions = 10;
  } // created_at_version_sequence

  // Number of original rows in the fragment, this includes rows that are now marked with
  // deletion tombstones. To compute the current number of rows, subtract
  // `deletion_file.num_deleted_rows` from this value.
  uint64 physical_rows = 4;
}

message DataFile {
  // Path to the root relative to the dataset's URI.
  string path = 1;
  // The ids of the fields/columns in this file.
  //
  // When a DataFile object is created in memory, every value in fields is assigned -1 by
  // default. An object with a value in fields of -1 must not be stored to disk. -2 is
  // used for "tombstoned", meaning a field that is no longer in use. This is often
  // because the original field id was reassigned to a different data file.
  //
  // In Lance v1 IDs are assigned based on position in the file, offset by the max
  // existing field id in the table (if any already). So when a fragment is first created
  // with one file of N columns, the field ids will be 1, 2, ..., N. If a second fragment
  // is created with M columns, the field ids will be N+1, N+2, ..., N+M.
  //
  // In Lance v1 there is one field for each field in the input schema, this includes
  // nested fields (both struct and list).  Fixed size list fields have only a single
  // field id (these are not considered nested fields in Lance v1).
  //
  // This allows column indices to be calculated from field IDs and the input schema.
  //
  // In Lance v2 the field IDs generally follow the same pattern but there is no
  // way to calculate the column index from the field ID.  This is because a given
  // field could be encoded in many different ways, some of which occupy a different
  // number of columns.  For example, a struct field could be encoded into N + 1 columns
  // or it could be encoded into a single packed column.  To determine column indices
  // the column_indices property should be used instead.
  //
  // In Lance v1 these ids must be sorted but might not always be contiguous.
  repeated int32 fields = 2;
  // The top-level column indices for each field in the file.
  //
  // If the data file is version 1 then this property will be empty
  //
  // Otherwise there must be one entry for each field in `fields`.
  //
  // Some fields may not correspond to a top-level column in the file.  In these cases
  // the index will -1.
  //
  // For example, consider the schema:
  //
  // - dimension: packed-struct (0):
  //   - x: u32 (1)
  //   - y: u32 (2)
  // - path: `list<u32>` (3)
  // - embedding: `fsl<768>` (4)
  //   - fp64
  // - borders: `fsl<4>` (5)
  //   - simple-struct (6)
  //     - margin: fp64 (7)
  //     - padding: fp64 (8)
  //
  // One possible column indices array could be:
  // [0, -1, -1, 1, 3, 4, 5, 6, 7]
  //
  // This reflects quite a few phenomenon:
  // - The packed struct is encoded into a single column and there is no top-level column
  //   for the x or y fields
  // - The variable sized list is encoded into two columns
  // - The embedding is encoded into a single column (common for FSL of primitive) and there
  //   is not "FSL column"
  // - The borders field actually does have an "FSL column"
  //
  // The column indices table may not have duplicates (other than -1)
  repeated int32 column_indices = 3;
  // The major file version used to create the file
  uint32 file_major_version = 4;
  // The minor file version used to create the file
  //
  // If both `file_major_version` and `file_minor_version` are set to 0,
  // then this is a version 0.1 or version 0.2 file.
  uint32 file_minor_version = 5;

  // The known size of the file on disk in bytes.
  //
  // This is used to quickly find the footer of the file.
  //
  // When this is zero, it should be interpreted as "unknown".
  uint64 file_size_bytes = 6;

  // The base path index of the data file. Used when the file is imported or referred from another dataset.
  // Lance use it as key of the base_paths field in Manifest to determine the actual base path of the data file.
  optional uint32 base_id = 7;
} // DataFile

// Deletion File
//
// The path of the deletion file is constructed as:
//   {root}/_deletions/{fragment_id}-{read_version}-{id}.{extension}
// where {extension} depends on DeletionFileType.
message DeletionFile {
  // Type of deletion file, intended as a way to increase efficiency of the storage of deleted row
  // offsets. If there are sparsely deleted rows, then ARROW_ARRAY is the most efficient. If there
  // are densely deleted rows, then BITMAP is the most efficient.
  enum DeletionFileType {
    // A single Int32Array of deleted row offsets, stored as an Arrow IPC file with one batch and
    // one column. Has a .arrow extension.
    ARROW_ARRAY = 0;
    // A Roaring Bitmap of deleted row offsets. Has a .bin extension.
    BITMAP = 1;
  }

  // Type of deletion file.
  DeletionFileType file_type = 1;
  // The version of the dataset this deletion file was built from.
  uint64 read_version = 2;
  // An opaque id used to differentiate this file from others written by concurrent
  // writers.
  uint64 id = 3;
  // The number of rows that are marked as deleted.
  uint64 num_deleted_rows = 4;
  // The base path index of the deletion file. Used when the file is imported or referred from another
  // dataset. Lance uses it as key of the base_paths field in Manifest to determine the actual base
  // path of the deletion file.
  optional uint32 base_id = 7;
} // DeletionFile

message ExternalFile {
  // Path to the file, relative to the root of the table.
  string path = 1;
  // The byte offset in the file where the data starts.
  uint64 offset = 2;
  // The size of the data in the file, in bytes.
  uint64 size = 3;
}

// Empty details messages for older indexes that don't take advantage of the details field.
message VectorIndexDetails {}

message FragmentReuseIndexDetails {

  oneof content {
    // if < 200KB, store the content inline, otherwise store the InlineContent bytes in external file
    InlineContent inline = 1;
    ExternalFile external = 2;
  }

  message InlineContent {
    repeated Version versions = 1;
  }

  message FragmentDigest {
    uint64 id = 1;

    uint64 physical_rows = 2;

    uint64 num_deleted_rows = 3;
  }

  // A summarized version of the RewriteGroup information in a Rewrite transaction
  message Group {
    // A roaring treemap of the changed row addresses.
    // When combined with the old fragment IDs and new fragment IDs,
    // it can recover the full mapping of old row addresses to either new row addresses or deleted.
    // this mapping can then be used to remap indexes or satisfy index queries for the new unindexed fragments.
    bytes changed_row_addrs = 1;

    repeated FragmentDigest old_fragments = 2;

    repeated FragmentDigest new_fragments = 3;
  }

  message Version {
    // The dataset_version at the time the index adds this version entry
    uint64 dataset_version = 1;

    repeated Group groups = 3;
  }
}

// ============================================================================
// MemWAL Index Types
// ============================================================================

// Region manifest containing epoch-based fencing and WAL state.
// Each region has exactly one active writer at any time.
message RegionManifest {
  // Region identifier (UUID v4).
  UUID region_id = 11;

  // Manifest version number.
  // Matches the version encoded in the filename.
  uint64 version = 1;

  // Region spec ID this region was created with.
  // Set at region creation and immutable thereafter.
  // A value of 0 indicates a manually-created region not governed by any spec.
  uint32 region_spec_id = 10;

  // Writer fencing token - monotonically increasing.
  // A writer must increment this when claiming the region.
  uint64 writer_epoch = 2;

  // The most recent WAL entry position (0-based) that has been flushed to a MemTable.
  // During recovery, replay starts from replay_after_wal_entry_position + 1.
  uint64 replay_after_wal_entry_position = 3;

  // The most recent WAL entry position (0-based) at the time manifest was updated.
  // This is a hint, not authoritative - recovery must list files to find actual state.
  uint64 wal_entry_position_last_seen = 4;

  // Next generation ID to create (incremented after each MemTable flush).
  uint64 current_generation = 6;

  // Field 7 removed: merged_generation moved to MemWalIndexDetails.merged_generations
  // which is the authoritative source for merge progress.

  // List of flushed MemTable generations and their directory paths.
  repeated FlushedGeneration flushed_generations = 8;
}

// A flushed MemTable generation and its storage location.
message FlushedGeneration {
  // Generation number.
  uint64 generation = 1;

  // Directory name relative to the region directory.
  string path = 2;
}

// A region's merged generation, used in MemWalIndexDetails.
message MergedGeneration {
  // Region identifier (UUID v4).
  UUID region_id = 1;

  // Last generation merged to base table for this region.
  uint64 generation = 2;
}

// Tracks which merged generation a base table index has been rebuilt to cover.
// Used to determine whether to read from flushed MemTable indexes or base table.
message IndexCatchupProgress {
  // Name of the base table index (must match an entry in maintained_indexes).
  string index_name = 1;

  // Per-region progress: the generation up to which this index covers.
  // If a region is not present, the index is assumed to be fully caught up
  // (i.e., caught_up_generation >= merged_generation for that region).
  repeated MergedGeneration caught_up_generations = 2;
}

// Index details for MemWAL Index, stored in IndexMetadata.index_details.
// This is the centralized structure for all MemWAL metadata:
// - Configuration (region specs, indexes to maintain)
// - Merge progress (merged generations per region)
// - Region state snapshots
//
// Writers read this index to get configuration before writing.
// Readers read this index to discover regions and their state.
// A background process updates the index periodically to keep region snapshots current.
//
// Region snapshots are stored as a Lance file with one row per region.
// The schema has one column per RegionManifest field, with region fields as columns:
//   region_id: fixed_size_binary(16)  -- UUID bytes
//   version: uint64
//   region_spec_id: uint32
//   writer_epoch: uint64
//   replay_after_wal_entry_position: uint64
//   wal_entry_position_last_seen: uint64
//   current_generation: uint64
//   merged_generation: uint64
//   flushed_generations: list<struct<generation: uint64, path: string>>
message MemWalIndexDetails {
  // Snapshot timestamp (Unix timestamp in milliseconds).
  int64 snapshot_ts_millis = 1;

  // Number of regions in the snapshot.
  // Used to determine storage format without reading the snapshot data.
  uint32 num_regions = 2;

  // Inline region snapshots for small region counts.
  // When num_regions <= threshold (implementation-defined, e.g., 100),
  // snapshots are stored inline as serialized bytes.
  // Format: Lance file bytes with the region snapshot schema.
  optional bytes inline_snapshots = 3;

  // Region specs defining how to derive region identifiers.
  // This configuration determines how rows are partitioned into regions.
  repeated RegionSpec region_specs = 7;

  // Indexes from the base table to maintain in MemTables.
  // These are index names referencing indexes defined on the base table.
  // The primary key btree index is always maintained implicitly and
  // should not be listed here.
  //
  // For vector indexes, MemTables inherit quantization parameters (PQ codebook,
  // SQ params) from the base table index to ensure distance comparability.
  repeated string maintained_indexes = 8;

  // Last generation merged to base table for each region.
  // This is updated atomically with merge-insert data commits, enabling
  // conflict resolution when multiple mergers operate concurrently.
  //
  // Note: This is separate from region snapshots because:
  // 1. merged_generations is updated by mergers (atomic with data commit)
  // 2. region snapshots are updated by background index builder
  repeated MergedGeneration merged_generations = 9;

  // Per-index catchup progress tracking.
  // When data is merged to the base table, base table indexes are rebuilt
  // asynchronously. This field tracks which generation each index covers.
  //
  // For indexed queries, if an index's caught_up_generation < merged_generation,
  // readers should use flushed MemTable indexes for the gap instead of
  // scanning unindexed data in the base table.
  //
  // If an index is not present in this list, it is assumed to be fully caught up.
  repeated IndexCatchupProgress index_catchup = 10;
}

// Region spec definition.
message RegionSpec {
  // Unique identifier for this spec within the index.
  // IDs are never reused.
  uint32 spec_id = 1;

  // Region field definitions that determine how to compute region identifiers.
  repeated RegionField fields = 2;
}

// Region field definition.
message RegionField {
  // Unique string identifier for this region field.
  string field_id = 1;

  // Field IDs referencing source columns in the schema.
  repeated int32 source_ids = 2;

  // Well-known region transform name (e.g., "identity", "year", "bucket").
  // Mutually exclusive with expression.
  optional string transform = 3;

  // DataFusion SQL expression for custom logic.
  // Mutually exclusive with transform.
  optional string expression = 4;

  // Output type of the region value (Arrow type name).
  string result_type = 5;

  // Transform parameters (e.g., num_buckets for bucket transform).
  map<string, string> parameters = 6;
}