lance_table/
feature_flags.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Feature flags
5
6use snafu::location;
7
8use crate::format::Manifest;
9use lance_core::{Error, Result};
10
11/// Fragments may contain deletion files, which record the tombstones of
12/// soft-deleted rows.
13pub const FLAG_DELETION_FILES: u64 = 1;
14/// Row ids are stable for both moves and updates. Fragments contain an index
15/// mapping row ids to row addresses.
16pub const FLAG_STABLE_ROW_IDS: u64 = 2;
17/// Files are written with the new v2 format (this flag is no longer used)
18pub const FLAG_USE_V2_FORMAT_DEPRECATED: u64 = 4;
19/// Table config is present
20pub const FLAG_TABLE_CONFIG: u64 = 8;
21/// Dataset is a shallow clone with external base paths
22pub const FLAG_SHALLOW_CLONE: u64 = 16;
23/// The first bit that is unknown as a feature flag
24pub const FLAG_UNKNOWN: u64 = 32;
25
26/// Set the reader and writer feature flags in the manifest based on the contents of the manifest.
27pub fn apply_feature_flags(manifest: &mut Manifest, enable_stable_row_id: bool) -> Result<()> {
28    // Reset flags
29    manifest.reader_feature_flags = 0;
30    manifest.writer_feature_flags = 0;
31
32    let has_deletion_files = manifest
33        .fragments
34        .iter()
35        .any(|frag| frag.deletion_file.is_some());
36    if has_deletion_files {
37        // Both readers and writers need to be able to read deletion files
38        manifest.reader_feature_flags |= FLAG_DELETION_FILES;
39        manifest.writer_feature_flags |= FLAG_DELETION_FILES;
40    }
41
42    // If any fragment has row ids, they must all have row ids.
43    let has_row_ids = manifest
44        .fragments
45        .iter()
46        .any(|frag| frag.row_id_meta.is_some());
47    if has_row_ids || enable_stable_row_id {
48        if !manifest
49            .fragments
50            .iter()
51            .all(|frag| frag.row_id_meta.is_some())
52        {
53            return Err(Error::invalid_input(
54                "All fragments must have row ids",
55                location!(),
56            ));
57        }
58        manifest.reader_feature_flags |= FLAG_STABLE_ROW_IDS;
59        manifest.writer_feature_flags |= FLAG_STABLE_ROW_IDS;
60    }
61
62    // Test whether any table metadata has been set
63    if !manifest.config.is_empty() {
64        manifest.writer_feature_flags |= FLAG_TABLE_CONFIG;
65    }
66
67    // Check if this is a shallow clone dataset by examining base_paths
68    if !manifest.base_paths.is_empty() {
69        manifest.reader_feature_flags |= FLAG_SHALLOW_CLONE;
70        manifest.writer_feature_flags |= FLAG_SHALLOW_CLONE;
71    }
72
73    Ok(())
74}
75
76pub fn can_read_dataset(reader_flags: u64) -> bool {
77    reader_flags < FLAG_UNKNOWN
78}
79
80pub fn can_write_dataset(writer_flags: u64) -> bool {
81    writer_flags < FLAG_UNKNOWN
82}
83
84pub fn has_deprecated_v2_feature_flag(writer_flags: u64) -> bool {
85    writer_flags & FLAG_USE_V2_FORMAT_DEPRECATED != 0
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91    use crate::format::BasePath;
92
93    #[test]
94    fn test_read_check() {
95        assert!(can_read_dataset(0));
96        assert!(can_read_dataset(super::FLAG_DELETION_FILES));
97        assert!(can_read_dataset(super::FLAG_STABLE_ROW_IDS));
98        assert!(can_read_dataset(super::FLAG_USE_V2_FORMAT_DEPRECATED));
99        assert!(can_read_dataset(super::FLAG_TABLE_CONFIG));
100        assert!(can_read_dataset(super::FLAG_SHALLOW_CLONE));
101        assert!(can_read_dataset(
102            super::FLAG_DELETION_FILES
103                | super::FLAG_STABLE_ROW_IDS
104                | super::FLAG_USE_V2_FORMAT_DEPRECATED
105        ));
106        assert!(!can_read_dataset(super::FLAG_UNKNOWN));
107    }
108
109    #[test]
110    fn test_write_check() {
111        assert!(can_write_dataset(0));
112        assert!(can_write_dataset(super::FLAG_DELETION_FILES));
113        assert!(can_write_dataset(super::FLAG_STABLE_ROW_IDS));
114        assert!(can_write_dataset(super::FLAG_USE_V2_FORMAT_DEPRECATED));
115        assert!(can_write_dataset(super::FLAG_TABLE_CONFIG));
116        assert!(can_write_dataset(super::FLAG_SHALLOW_CLONE));
117        assert!(can_write_dataset(
118            super::FLAG_DELETION_FILES
119                | super::FLAG_STABLE_ROW_IDS
120                | super::FLAG_USE_V2_FORMAT_DEPRECATED
121                | super::FLAG_TABLE_CONFIG
122                | super::FLAG_SHALLOW_CLONE
123        ));
124        assert!(!can_write_dataset(super::FLAG_UNKNOWN));
125    }
126
127    #[test]
128    fn test_shallow_clone_feature_flags() {
129        use crate::format::{DataStorageFormat, Manifest};
130        use arrow_schema::{Field as ArrowField, Schema as ArrowSchema};
131        use lance_core::datatypes::Schema;
132        use std::collections::HashMap;
133        use std::sync::Arc;
134        // Create a basic schema for testing
135        let arrow_schema = ArrowSchema::new(vec![ArrowField::new(
136            "test_field",
137            arrow_schema::DataType::Int64,
138            false,
139        )]);
140        let schema = Schema::try_from(&arrow_schema).unwrap();
141        // Test 1: Normal dataset (no base_paths) should not have FLAG_SHALLOW_CLONE
142        let mut normal_manifest = Manifest::new(
143            schema.clone(),
144            Arc::new(vec![]),
145            DataStorageFormat::default(),
146            None,
147            HashMap::new(), // Empty base_paths
148        );
149        apply_feature_flags(&mut normal_manifest, false).unwrap();
150        assert_eq!(normal_manifest.reader_feature_flags & FLAG_SHALLOW_CLONE, 0);
151        assert_eq!(normal_manifest.writer_feature_flags & FLAG_SHALLOW_CLONE, 0);
152        // Test 2: Cloned dataset (with base_paths) should have FLAG_SHALLOW_CLONE
153        let mut base_paths: HashMap<u32, BasePath> = HashMap::new();
154        base_paths.insert(
155            1,
156            BasePath {
157                id: 1,
158                name: Some("test_ref".to_string()),
159                is_dataset_root: true,
160                path: "/path/to/original".to_string(),
161            },
162        );
163        let mut cloned_manifest = Manifest::new(
164            schema,
165            Arc::new(vec![]),
166            DataStorageFormat::default(),
167            None,
168            base_paths,
169        );
170        apply_feature_flags(&mut cloned_manifest, false).unwrap();
171        assert_ne!(cloned_manifest.reader_feature_flags & FLAG_SHALLOW_CLONE, 0);
172        assert_ne!(cloned_manifest.writer_feature_flags & FLAG_SHALLOW_CLONE, 0);
173    }
174}