Skip to main content

lance_table/
feature_flags.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Feature flags
5
6use crate::format::Manifest;
7use lance_core::{Error, Result};
8
9/// Fragments may contain deletion files, which record the tombstones of
10/// soft-deleted rows.
11pub const FLAG_DELETION_FILES: u64 = 1;
12/// Row ids are stable for both moves and updates. Fragments contain an index
13/// mapping row ids to row addresses.
14pub const FLAG_STABLE_ROW_IDS: u64 = 2;
15/// Files are written with the new v2 format (this flag is no longer used)
16pub const FLAG_USE_V2_FORMAT_DEPRECATED: u64 = 4;
17/// Table config is present
18pub const FLAG_TABLE_CONFIG: u64 = 8;
19/// Dataset uses multiple base paths (for shallow clones or multi-base datasets)
20pub const FLAG_BASE_PATHS: u64 = 16;
21/// Disable writing transaction file under _transaction/, this flag is set when we only want to write inline transaction in manifest
22pub const FLAG_DISABLE_TRANSACTION_FILE: u64 = 32;
23/// The first bit that is unknown as a feature flag
24pub const FLAG_UNKNOWN: u64 = 64;
25
26/// Set the reader and writer feature flags in the manifest based on the contents of the manifest.
27pub fn apply_feature_flags(
28    manifest: &mut Manifest,
29    enable_stable_row_id: bool,
30    disable_transaction_file: bool,
31) -> Result<()> {
32    // Reset flags
33    manifest.reader_feature_flags = 0;
34    manifest.writer_feature_flags = 0;
35
36    let has_deletion_files = manifest
37        .fragments
38        .iter()
39        .any(|frag| frag.deletion_file.is_some());
40    if has_deletion_files {
41        // Both readers and writers need to be able to read deletion files
42        manifest.reader_feature_flags |= FLAG_DELETION_FILES;
43        manifest.writer_feature_flags |= FLAG_DELETION_FILES;
44    }
45
46    // If any fragment has row ids, they must all have row ids.
47    let has_row_ids = manifest
48        .fragments
49        .iter()
50        .any(|frag| frag.row_id_meta.is_some());
51    if has_row_ids || enable_stable_row_id {
52        if !manifest
53            .fragments
54            .iter()
55            .all(|frag| frag.row_id_meta.is_some())
56        {
57            return Err(Error::invalid_input("All fragments must have row ids"));
58        }
59        manifest.reader_feature_flags |= FLAG_STABLE_ROW_IDS;
60        manifest.writer_feature_flags |= FLAG_STABLE_ROW_IDS;
61    }
62
63    // Test whether any table metadata has been set
64    if !manifest.config.is_empty() {
65        manifest.writer_feature_flags |= FLAG_TABLE_CONFIG;
66    }
67
68    // Check if this dataset uses multiple base paths (for shallow clones or multi-base datasets)
69    if !manifest.base_paths.is_empty() {
70        manifest.reader_feature_flags |= FLAG_BASE_PATHS;
71        manifest.writer_feature_flags |= FLAG_BASE_PATHS;
72    }
73
74    if disable_transaction_file {
75        manifest.writer_feature_flags |= FLAG_DISABLE_TRANSACTION_FILE;
76    }
77    Ok(())
78}
79
80pub fn can_read_dataset(reader_flags: u64) -> bool {
81    reader_flags < FLAG_UNKNOWN
82}
83
84pub fn can_write_dataset(writer_flags: u64) -> bool {
85    writer_flags < FLAG_UNKNOWN
86}
87
88pub fn has_deprecated_v2_feature_flag(writer_flags: u64) -> bool {
89    writer_flags & FLAG_USE_V2_FORMAT_DEPRECATED != 0
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use crate::format::BasePath;
96
97    #[test]
98    fn test_read_check() {
99        assert!(can_read_dataset(0));
100        assert!(can_read_dataset(super::FLAG_DELETION_FILES));
101        assert!(can_read_dataset(super::FLAG_STABLE_ROW_IDS));
102        assert!(can_read_dataset(super::FLAG_USE_V2_FORMAT_DEPRECATED));
103        assert!(can_read_dataset(super::FLAG_TABLE_CONFIG));
104        assert!(can_read_dataset(super::FLAG_BASE_PATHS));
105        assert!(can_read_dataset(super::FLAG_DISABLE_TRANSACTION_FILE));
106        assert!(can_read_dataset(
107            super::FLAG_DELETION_FILES
108                | super::FLAG_STABLE_ROW_IDS
109                | super::FLAG_USE_V2_FORMAT_DEPRECATED
110        ));
111        assert!(!can_read_dataset(super::FLAG_UNKNOWN));
112    }
113
114    #[test]
115    fn test_write_check() {
116        assert!(can_write_dataset(0));
117        assert!(can_write_dataset(super::FLAG_DELETION_FILES));
118        assert!(can_write_dataset(super::FLAG_STABLE_ROW_IDS));
119        assert!(can_write_dataset(super::FLAG_USE_V2_FORMAT_DEPRECATED));
120        assert!(can_write_dataset(super::FLAG_TABLE_CONFIG));
121        assert!(can_write_dataset(super::FLAG_BASE_PATHS));
122        assert!(can_write_dataset(super::FLAG_DISABLE_TRANSACTION_FILE));
123        assert!(can_write_dataset(
124            super::FLAG_DELETION_FILES
125                | super::FLAG_STABLE_ROW_IDS
126                | super::FLAG_USE_V2_FORMAT_DEPRECATED
127                | super::FLAG_TABLE_CONFIG
128                | super::FLAG_BASE_PATHS
129        ));
130        assert!(!can_write_dataset(super::FLAG_UNKNOWN));
131    }
132
133    #[test]
134    fn test_base_paths_feature_flags() {
135        use crate::format::{DataStorageFormat, Manifest};
136        use arrow_schema::{Field as ArrowField, Schema as ArrowSchema};
137        use lance_core::datatypes::Schema;
138        use std::collections::HashMap;
139        use std::sync::Arc;
140        // Create a basic schema for testing
141        let arrow_schema = ArrowSchema::new(vec![ArrowField::new(
142            "test_field",
143            arrow_schema::DataType::Int64,
144            false,
145        )]);
146        let schema = Schema::try_from(&arrow_schema).unwrap();
147        // Test 1: Normal dataset (no base_paths) should not have FLAG_BASE_PATHS
148        let mut normal_manifest = Manifest::new(
149            schema.clone(),
150            Arc::new(vec![]),
151            DataStorageFormat::default(),
152            HashMap::new(), // Empty base_paths
153        );
154        apply_feature_flags(&mut normal_manifest, false, false).unwrap();
155        assert_eq!(normal_manifest.reader_feature_flags & FLAG_BASE_PATHS, 0);
156        assert_eq!(normal_manifest.writer_feature_flags & FLAG_BASE_PATHS, 0);
157        // Test 2: Dataset with base_paths (shallow clone or multi-base) should have FLAG_BASE_PATHS
158        let mut base_paths: HashMap<u32, BasePath> = HashMap::new();
159        base_paths.insert(
160            1,
161            BasePath::new(
162                1,
163                "file:///path/to/original".to_string(),
164                Some("test_ref".to_string()),
165                true,
166            ),
167        );
168        let mut multi_base_manifest = Manifest::new(
169            schema,
170            Arc::new(vec![]),
171            DataStorageFormat::default(),
172            base_paths,
173        );
174        apply_feature_flags(&mut multi_base_manifest, false, false).unwrap();
175        assert_ne!(
176            multi_base_manifest.reader_feature_flags & FLAG_BASE_PATHS,
177            0
178        );
179        assert_ne!(
180            multi_base_manifest.writer_feature_flags & FLAG_BASE_PATHS,
181            0
182        );
183    }
184}