lance_table/
feature_flags.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Feature flags
5
6use snafu::location;
7
8use crate::format::Manifest;
9use lance_core::{Error, Result};
10
11/// Fragments may contain deletion files, which record the tombstones of
12/// soft-deleted rows.
13pub const FLAG_DELETION_FILES: u64 = 1;
14/// Row ids are stable for both moves and updates. Fragments contain an index
15/// mapping row ids to row addresses.
16pub const FLAG_STABLE_ROW_IDS: u64 = 2;
17/// Files are written with the new v2 format (this flag is no longer used)
18pub const FLAG_USE_V2_FORMAT_DEPRECATED: u64 = 4;
19/// Table config is present
20pub const FLAG_TABLE_CONFIG: u64 = 8;
21/// Dataset uses multiple base paths (for shallow clones or multi-base datasets)
22pub const FLAG_BASE_PATHS: u64 = 16;
23/// Disable writing transaction file under _transaction/, this flag is set when we only want to write inline transaction in manifest
24pub const FLAG_DISABLE_TRANSACTION_FILE: u64 = 32;
25/// The first bit that is unknown as a feature flag
26pub const FLAG_UNKNOWN: u64 = 64;
27
28/// Set the reader and writer feature flags in the manifest based on the contents of the manifest.
29pub fn apply_feature_flags(
30    manifest: &mut Manifest,
31    enable_stable_row_id: bool,
32    disable_transaction_file: bool,
33) -> Result<()> {
34    // Reset flags
35    manifest.reader_feature_flags = 0;
36    manifest.writer_feature_flags = 0;
37
38    let has_deletion_files = manifest
39        .fragments
40        .iter()
41        .any(|frag| frag.deletion_file.is_some());
42    if has_deletion_files {
43        // Both readers and writers need to be able to read deletion files
44        manifest.reader_feature_flags |= FLAG_DELETION_FILES;
45        manifest.writer_feature_flags |= FLAG_DELETION_FILES;
46    }
47
48    // If any fragment has row ids, they must all have row ids.
49    let has_row_ids = manifest
50        .fragments
51        .iter()
52        .any(|frag| frag.row_id_meta.is_some());
53    if has_row_ids || enable_stable_row_id {
54        if !manifest
55            .fragments
56            .iter()
57            .all(|frag| frag.row_id_meta.is_some())
58        {
59            return Err(Error::invalid_input(
60                "All fragments must have row ids",
61                location!(),
62            ));
63        }
64        manifest.reader_feature_flags |= FLAG_STABLE_ROW_IDS;
65        manifest.writer_feature_flags |= FLAG_STABLE_ROW_IDS;
66    }
67
68    // Test whether any table metadata has been set
69    if !manifest.config.is_empty() {
70        manifest.writer_feature_flags |= FLAG_TABLE_CONFIG;
71    }
72
73    // Check if this dataset uses multiple base paths (for shallow clones or multi-base datasets)
74    if !manifest.base_paths.is_empty() {
75        manifest.reader_feature_flags |= FLAG_BASE_PATHS;
76        manifest.writer_feature_flags |= FLAG_BASE_PATHS;
77    }
78
79    if disable_transaction_file {
80        manifest.writer_feature_flags |= FLAG_DISABLE_TRANSACTION_FILE;
81    }
82    Ok(())
83}
84
85pub fn can_read_dataset(reader_flags: u64) -> bool {
86    reader_flags < FLAG_UNKNOWN
87}
88
89pub fn can_write_dataset(writer_flags: u64) -> bool {
90    writer_flags < FLAG_UNKNOWN
91}
92
93pub fn has_deprecated_v2_feature_flag(writer_flags: u64) -> bool {
94    writer_flags & FLAG_USE_V2_FORMAT_DEPRECATED != 0
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use crate::format::BasePath;
101
102    #[test]
103    fn test_read_check() {
104        assert!(can_read_dataset(0));
105        assert!(can_read_dataset(super::FLAG_DELETION_FILES));
106        assert!(can_read_dataset(super::FLAG_STABLE_ROW_IDS));
107        assert!(can_read_dataset(super::FLAG_USE_V2_FORMAT_DEPRECATED));
108        assert!(can_read_dataset(super::FLAG_TABLE_CONFIG));
109        assert!(can_read_dataset(super::FLAG_BASE_PATHS));
110        assert!(can_read_dataset(super::FLAG_DISABLE_TRANSACTION_FILE));
111        assert!(can_read_dataset(
112            super::FLAG_DELETION_FILES
113                | super::FLAG_STABLE_ROW_IDS
114                | super::FLAG_USE_V2_FORMAT_DEPRECATED
115        ));
116        assert!(!can_read_dataset(super::FLAG_UNKNOWN));
117    }
118
119    #[test]
120    fn test_write_check() {
121        assert!(can_write_dataset(0));
122        assert!(can_write_dataset(super::FLAG_DELETION_FILES));
123        assert!(can_write_dataset(super::FLAG_STABLE_ROW_IDS));
124        assert!(can_write_dataset(super::FLAG_USE_V2_FORMAT_DEPRECATED));
125        assert!(can_write_dataset(super::FLAG_TABLE_CONFIG));
126        assert!(can_write_dataset(super::FLAG_BASE_PATHS));
127        assert!(can_write_dataset(super::FLAG_DISABLE_TRANSACTION_FILE));
128        assert!(can_write_dataset(
129            super::FLAG_DELETION_FILES
130                | super::FLAG_STABLE_ROW_IDS
131                | super::FLAG_USE_V2_FORMAT_DEPRECATED
132                | super::FLAG_TABLE_CONFIG
133                | super::FLAG_BASE_PATHS
134        ));
135        assert!(!can_write_dataset(super::FLAG_UNKNOWN));
136    }
137
138    #[test]
139    fn test_base_paths_feature_flags() {
140        use crate::format::{DataStorageFormat, Manifest};
141        use arrow_schema::{Field as ArrowField, Schema as ArrowSchema};
142        use lance_core::datatypes::Schema;
143        use std::collections::HashMap;
144        use std::sync::Arc;
145        // Create a basic schema for testing
146        let arrow_schema = ArrowSchema::new(vec![ArrowField::new(
147            "test_field",
148            arrow_schema::DataType::Int64,
149            false,
150        )]);
151        let schema = Schema::try_from(&arrow_schema).unwrap();
152        // Test 1: Normal dataset (no base_paths) should not have FLAG_BASE_PATHS
153        let mut normal_manifest = Manifest::new(
154            schema.clone(),
155            Arc::new(vec![]),
156            DataStorageFormat::default(),
157            HashMap::new(), // Empty base_paths
158        );
159        apply_feature_flags(&mut normal_manifest, false, false).unwrap();
160        assert_eq!(normal_manifest.reader_feature_flags & FLAG_BASE_PATHS, 0);
161        assert_eq!(normal_manifest.writer_feature_flags & FLAG_BASE_PATHS, 0);
162        // Test 2: Dataset with base_paths (shallow clone or multi-base) should have FLAG_BASE_PATHS
163        let mut base_paths: HashMap<u32, BasePath> = HashMap::new();
164        base_paths.insert(
165            1,
166            BasePath::new(
167                1,
168                "file:///path/to/original".to_string(),
169                Some("test_ref".to_string()),
170                true,
171            ),
172        );
173        let mut multi_base_manifest = Manifest::new(
174            schema,
175            Arc::new(vec![]),
176            DataStorageFormat::default(),
177            base_paths,
178        );
179        apply_feature_flags(&mut multi_base_manifest, false, false).unwrap();
180        assert_ne!(
181            multi_base_manifest.reader_feature_flags & FLAG_BASE_PATHS,
182            0
183        );
184        assert_ne!(
185            multi_base_manifest.writer_feature_flags & FLAG_BASE_PATHS,
186            0
187        );
188    }
189}