lance_encoding/constants.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Constants for Lance encoding metadata keys
5//!
6//! These constants define the metadata keys used in Arrow field metadata
7//! to configure various encoding behaviors in Lance.
8
9// Compression-related metadata keys
10/// Metadata key for specifying compression scheme (e.g., "lz4", "zstd", "none")
11pub const COMPRESSION_META_KEY: &str = "lance-encoding:compression";
12/// Metadata key for specifying compression level (applies to schemes that support levels)
13pub const COMPRESSION_LEVEL_META_KEY: &str = "lance-encoding:compression-level";
14/// Metadata key for specifying RLE (Run-Length Encoding) threshold
15pub const RLE_THRESHOLD_META_KEY: &str = "lance-encoding:rle-threshold";
16/// Metadata key for specifying minichunk size
17pub const MINICHUNK_SIZE_META_KEY: &str = "lance-encoding:minichunk-size";
18
19// Dictionary encoding metadata keys
20/// Metadata key for specifying dictionary encoding threshold divisor
21/// Set to a large value to discourage dictionary encoding
22/// Set to a small value to encourage dictionary encoding
23pub const DICT_DIVISOR_META_KEY: &str = "lance-encoding:dict-divisor";
24/// Metadata key for dictionary encoding size ratio threshold (0.0-1.0]
25/// If estimated_dict_size/raw_size < ratio, use dictionary encoding.
26/// Example: 0.8 means use dict if encoded size < 80% of raw size
27/// Default: 0.8
28pub const DICT_SIZE_RATIO_META_KEY: &str = "lance-encoding:dict-size-ratio";
29/// Metadata key for selecting general compression scheme for dictionary values
30/// Valid values: "lz4", "zstd", "none"
31pub const DICT_VALUES_COMPRESSION_META_KEY: &str = "lance-encoding:dict-values-compression";
32/// Metadata key for selecting compression level for dictionary values
33/// Applies to schemes that support levels (e.g. zstd)
34pub const DICT_VALUES_COMPRESSION_LEVEL_META_KEY: &str =
35 "lance-encoding:dict-values-compression-level";
36
37/// Environment variable for selecting general compression scheme for dictionary values
38pub const DICT_VALUES_COMPRESSION_ENV_VAR: &str = "LANCE_ENCODING_DICT_VALUES_COMPRESSION";
39/// Environment variable for selecting compression level for dictionary values
40pub const DICT_VALUES_COMPRESSION_LEVEL_ENV_VAR: &str =
41 "LANCE_ENCODING_DICT_VALUES_COMPRESSION_LEVEL";
42
43// NOTE: BLOB_META_KEY is defined in lance-core to avoid circular dependency
44
45// Packed struct encoding metadata keys
46/// Legacy metadata key for packed struct encoding (deprecated)
47pub const PACKED_STRUCT_LEGACY_META_KEY: &str = "packed";
48/// Metadata key for packed struct encoding
49pub const PACKED_STRUCT_META_KEY: &str = "lance-encoding:packed";
50
51// Structural encoding metadata keys
52/// Metadata key for specifying structural encoding type
53pub const STRUCTURAL_ENCODING_META_KEY: &str = "lance-encoding:structural-encoding";
54/// Value for miniblock structural encoding
55pub const STRUCTURAL_ENCODING_MINIBLOCK: &str = "miniblock";
56/// Value for fullzip structural encoding
57pub const STRUCTURAL_ENCODING_FULLZIP: &str = "fullzip";
58
59// Byte stream split metadata keys
60/// Metadata key for byte stream split encoding configuration
61pub const BSS_META_KEY: &str = "lance-encoding:bss";
62/// Default BSS mode
63pub const DEFAULT_BSS_MODE: &str = "auto";