Skip to main content

cqlite_core/storage/sstable/
version_gate.rs

1//! SSTable version-letter gates for Cassandra BIG and BTI formats.
2//!
3//! This module implements the per-letter feature-gate logic that mirrors
4//! `BigFormat.java` and `BtiFormat.java` from Cassandra 5.0.8.  Each gate
5//! is a `bool` field derived **only** from the two-letter version string found
6//! in the SSTable filename prefix (e.g. `nb`, `oa`, `da`).
7//!
8//! ## Authority chain
9//!
10//! Cassandra 5.0.8 source (primary) > audit report B10 Part 2 > guide ch.22
11//!
12//! ### BIG format version letters (BigFormat.java:341-526)
13//!
14//! | Letter | Cassandra release | Notable additions |
15//! |--------|-------------------|--------------------|
16//! | `ma`   | 3.0.0             | Native row storage, BF hash swap |
17//! | `mb`   | 3.0.7 / 3.7       | Commit-log lower bound |
18//! | `mc`   | 3.0.8 / 3.9       | Commit-log intervals |
19//! | `md`   | 3.0.18 / 3.11.4   | Accurate min/max clustering |
20//! | `me`   | 3.0.25 / 3.11.11  | Originating host ID (first appearance) |
21//! | `na`   | 4.0-rc1           | Uncompressed chunks, pending repair, metadata checksum |
22//! | `nb`   | 4.0-rc2           | Default BIG letter for stock Cassandra 5.0 compat mode |
23//! | `oa`   | 5.0               | Improved min/max, uint deletion time, key range, token coverage |
24//!
25//! ### BTI format version letters (BtiFormat.java:287-420)
26//!
27//! | Letter | Cassandra release | Notes |
28//! |--------|------------------|-------|
29//! | `da`   | 5.0              | Only BTI letter; all gates TRUE |
30//!
31//! ## Storage-compatibility-mode note
32//!
33//! Stock Cassandra 5.0 writes **`nb`-versioned BIG** SSTables when
34//! `storage_compatibility_mode` is `CASSANDRA_4` (the default).  `oa` is
35//! only written after explicitly raising the mode to `NONE`.
36//!
37//! ## SSTable ID forms (Descriptor.java:85, 95)
38//!
39//! Cassandra 5.0 supports **two** SSTable ID forms:
40//! - Sequential: `nb-1-big-Data.db`  (integer id)
41//! - UUID-based: `nb-6aa08200a25111f0a3fef1a551383fb9-big-Data.db` (hex string)
42//!
43//! Both forms are generated by real Cassandra 5.0 clusters; the UUID form is
44//! the default since 5.0.0 (`uuid_sstable_identifiers_enabled: true`).
45
46use std::path::Path;
47
48use crate::{Error, Result};
49
50/// SSTable format family: BIG (`big`) or BTI (`bti`).
51///
52/// Matches the `<format>` segment of the Cassandra filename pattern
53/// `<version>-<id>-<format>-<component>.db`.
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum SsTableFormat {
56    /// "big" – the classic BIG format (Cassandra 3.0 – 5.0).
57    Big,
58    /// "bti" – the trie-based BTI format (Cassandra 5.0+).
59    Bti,
60}
61
62impl SsTableFormat {
63    /// Parse format name from string (`"big"` or `"bti"`).
64    pub fn parse(s: &str) -> Option<Self> {
65        match s {
66            "big" => Some(Self::Big),
67            "bti" => Some(Self::Bti),
68            _ => None,
69        }
70    }
71
72    /// Return the canonical lowercase name used in filenames.
73    pub fn as_str(&self) -> &'static str {
74        match self {
75            Self::Big => "big",
76            Self::Bti => "bti",
77        }
78    }
79}
80
81/// Parsed Cassandra SSTable descriptor extracted from a filename.
82///
83/// Filename pattern (Descriptor.java:251):
84/// ```text
85/// <version>-<id>-<format>-<component>.db
86/// ```
87///
88/// Both sequential integer IDs (`1`, `2`, …) and UUID-ish hex string IDs
89/// (`6aa08200a25111f0a3fef1a551383fb9`) are accepted.
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct SsTableDescriptor {
92    /// Two-letter version string, e.g. `"nb"`, `"oa"`, `"da"`.
93    pub version: String,
94    /// Raw SSTable id as found in the filename (integer string or hex UUID).
95    pub sstable_id: String,
96    /// Format family (`big` or `bti`).
97    pub format: SsTableFormat,
98    /// Component suffix after the last `-`, e.g. `"Data.db"`.
99    pub component: String,
100}
101
102impl SsTableDescriptor {
103    /// Parse a Cassandra SSTable descriptor from a filename or file path.
104    ///
105    /// Accepts both:
106    /// - `nb-1-big-Data.db`               (sequential integer id)
107    /// - `nb-6aa08200a25111f0a3fef1a551383fb9-big-Data.db`  (UUID hex id)
108    /// - `oa-00000000-0000-0000-0000-000000000001-big-Data.db` (hyphenated UUID)
109    ///
110    /// Returns an error if the filename does not contain at least four
111    /// dash-separated segments or if the format segment is not `big` or `bti`.
112    pub fn parse(path: &Path) -> Result<Self> {
113        let filename = path
114            .file_name()
115            .and_then(|f| f.to_str())
116            .ok_or_else(|| Error::InvalidPath(format!("Invalid SSTable path: {:?}", path)))?;
117
118        Self::parse_filename(filename)
119    }
120
121    /// Parse from a bare filename string (no directory component required).
122    pub fn parse_filename(filename: &str) -> Result<Self> {
123        // Strip the `.db` extension if present so we can reason about the parts.
124        let base = if let Some(b) = filename.strip_suffix(".db") {
125            b
126        } else if let Some(b) = filename.strip_suffix(".txt") {
127            // TOC.txt – strip .txt instead
128            b
129        } else {
130            filename
131        };
132
133        // Split on `-`.  The component itself may contain `-` (e.g. `TOC`
134        // doesn't, but `CompressionInfo` doesn't either – however, future
135        // components could).  We therefore split from the left and treat
136        // everything from part[3] onwards as the component.
137        //
138        // Pattern: <version>-<id>-<format>-<component>
139        //   parts[0] = version  (always 2 lowercase letters: [a-z]{2})
140        //   parts[1..n-2] = id  (one or more dash-joined segments)
141        //   parts[n-1] = format ("big" or "bti")
142        //   parts[n] = component (rest of original, including original `.db` suffix)
143        //
144        // We search for the format segment by scanning right-to-left after
145        // the first part for "big" or "bti", which avoids being tripped up
146        // by dash-separated UUID ids.
147
148        let parts: Vec<&str> = base.split('-').collect();
149        if parts.len() < 4 {
150            return Err(Error::InvalidFormat(format!(
151                "SSTable filename has fewer than 4 dash-separated segments: {:?}",
152                filename
153            )));
154        }
155
156        let version = parts[0];
157        // Validate version is exactly 2 lowercase letters.
158        if version.len() != 2 || !version.chars().all(|c| c.is_ascii_lowercase()) {
159            return Err(Error::InvalidFormat(format!(
160                "SSTable version segment must be 2 lowercase letters, got {:?} in {:?}",
161                version, filename
162            )));
163        }
164
165        // Find the format segment by scanning right-to-left (skip the last
166        // component part), starting from parts[2].
167        // Strategy: look for "big" or "bti" starting from the second-to-last
168        // non-component position.  The component name never equals "big" or "bti".
169        let format_idx = parts[2..]
170            .iter()
171            .enumerate()
172            .rev()
173            .find(|(_, p)| **p == "big" || **p == "bti")
174            .map(|(i, _)| i + 2); // offset back to original parts index
175
176        let format_idx = format_idx.ok_or_else(|| {
177            Error::InvalidFormat(format!(
178                "No 'big' or 'bti' format segment found in {:?}",
179                filename
180            ))
181        })?;
182
183        let format = SsTableFormat::parse(parts[format_idx]).ok_or_else(|| {
184            Error::InvalidFormat(format!(
185                "Unknown format {:?} in {:?}",
186                parts[format_idx], filename
187            ))
188        })?;
189
190        // id is everything between version and format
191        let sstable_id = parts[1..format_idx].join("-");
192
193        // component is everything after format, re-joined and with extension restored
194        let component_base = parts[format_idx + 1..].join("-");
195        // Re-attach original extension
196        let extension = if filename.ends_with(".db") {
197            ".db"
198        } else {
199            ".txt"
200        };
201        let component = format!("{}{}", component_base, extension);
202
203        Ok(Self {
204            version: version.to_string(),
205            sstable_id,
206            format,
207            component,
208        })
209    }
210}
211
212/// Per-letter feature gates for a BIG-format SSTable.
213///
214/// Each boolean field corresponds exactly to the gate computed in
215/// `BigFormat.BigVersion` (Cassandra 5.0.8, lines 395-410).
216///
217/// Gates are derived solely from the **two-letter version string**; they do
218/// not depend on file content.
219#[derive(Debug, Clone, PartialEq, Eq)]
220pub struct BigVersionGates {
221    /// Raw version string this gate set was computed from.
222    pub version: String,
223
224    // ---- Gates in order as they appear in BigFormat.java ----
225    /// `hasCommitLogLowerBound` — version >= `mb`
226    /// (BigFormat.java:395)
227    pub has_commit_log_lower_bound: bool,
228
229    /// `hasCommitLogIntervals` — version >= `mc`
230    /// (BigFormat.java:396)
231    pub has_commit_log_intervals: bool,
232
233    /// `hasAccurateMinMax` — matches `m[d-z]` or `n[a-z]`; **deprecated in `oa`**
234    /// (BigFormat.java:397)
235    pub has_accurate_min_max: bool,
236
237    /// `hasLegacyMinMax` — matches `m[a-z]` or `n[a-z]`; **deprecated in `oa`**
238    /// (BigFormat.java:398)
239    pub has_legacy_min_max: bool,
240
241    /// `hasOriginatingHostId` — version >= `nb` **OR** matches `m[e-z]`
242    ///
243    /// This is the straddle gate: it fires for the `me`–`mz` block of the `m`
244    /// series AND for all versions >= `nb` in the `n`/`o` series.
245    /// (BigFormat.java:400)
246    pub has_originating_host_id: bool,
247
248    /// `hasMaxCompressedLength` — version >= `na`
249    /// (BigFormat.java:401)
250    pub has_max_compressed_length: bool,
251
252    /// `hasPendingRepair` — version >= `na`
253    /// (BigFormat.java:402)
254    pub has_pending_repair: bool,
255
256    /// `hasIsTransient` — version >= `na`
257    /// (BigFormat.java:403)
258    pub has_is_transient: bool,
259
260    /// `hasMetadataChecksum` — version >= `na`
261    /// (BigFormat.java:404)
262    pub has_metadata_checksum: bool,
263
264    /// `hasOldBfFormat` — version < `na`  (old bloom-filter format)
265    /// (BigFormat.java:405)
266    pub has_old_bf_format: bool,
267
268    /// `hasImprovedMinMax` — version >= `oa`  (**oa-only**)
269    /// (BigFormat.java:406)
270    pub has_improved_min_max: bool,
271
272    /// `hasPartitionLevelDeletionPresenceMarker` — version >= `oa`  (**oa-only**)
273    /// (BigFormat.java:407)
274    pub has_partition_level_deletion_presence_marker: bool,
275
276    /// `hasKeyRange` — version >= `oa`  (**oa-only**)
277    /// (BigFormat.java:408)
278    pub has_key_range: bool,
279
280    /// `hasUIntDeletionTime` — version >= `oa`  (**oa-only**, 2106-safe TTL)
281    /// (BigFormat.java:409)
282    pub has_uint_deletion_time: bool,
283
284    /// `hasTokenSpaceCoverage` — version >= `oa`  (**oa-only**)
285    /// (BigFormat.java:410)
286    pub has_token_space_coverage: bool,
287}
288
289impl BigVersionGates {
290    /// Compute all gates for the given two-letter BIG-format version string.
291    ///
292    /// The version comparison uses lexicographic ordering of the raw string,
293    /// which is correct because Cassandra uses single-character prefix letters
294    /// (`m`, `n`, `o`) followed by a single lowercase suffix.  The Cassandra
295    /// source code does the same (`version.compareTo("oa") >= 0`).
296    ///
297    /// # Errors
298    ///
299    /// Returns `Err` if `version` is not exactly two ASCII lowercase letters.
300    pub fn from_version(version: &str) -> Result<Self> {
301        if version.len() != 2 || !version.chars().all(|c| c.is_ascii_lowercase()) {
302            return Err(Error::InvalidFormat(format!(
303                "BIG version must be 2 lowercase letters, got {:?}",
304                version
305            )));
306        }
307
308        let v = version;
309
310        // `version.matches("(m[d-z])|(n[a-z])")` from BigFormat.java line 397.
311        let has_accurate_min_max = {
312            let first = v.chars().next().unwrap();
313            let second = v.chars().nth(1).unwrap();
314            (first == 'm' && ('d'..='z').contains(&second))
315                || (first == 'n' && second.is_ascii_lowercase())
316        };
317
318        // `version.matches("(m[a-z])|(n[a-z])")` from BigFormat.java line 398.
319        let has_legacy_min_max = {
320            let first = v.chars().next().unwrap();
321            let second = v.chars().nth(1).unwrap();
322            (first == 'm' && second.is_ascii_lowercase())
323                || (first == 'n' && second.is_ascii_lowercase())
324        };
325
326        // `version.compareTo("nb") >= 0 || version.matches("(m[e-z])")` (line 400).
327        let has_originating_host_id = {
328            let first = v.chars().next().unwrap();
329            let second = v.chars().nth(1).unwrap();
330            v >= "nb" || (first == 'm' && ('e'..='z').contains(&second))
331        };
332
333        Ok(Self {
334            version: version.to_string(),
335            has_commit_log_lower_bound: v >= "mb",
336            has_commit_log_intervals: v >= "mc",
337            has_accurate_min_max,
338            has_legacy_min_max,
339            has_originating_host_id,
340            has_max_compressed_length: v >= "na",
341            has_pending_repair: v >= "na",
342            has_is_transient: v >= "na",
343            has_metadata_checksum: v >= "na",
344            has_old_bf_format: v < "na",
345            // oa-only gates: all false for nb, all true for oa
346            has_improved_min_max: v >= "oa",
347            has_partition_level_deletion_presence_marker: v >= "oa",
348            has_key_range: v >= "oa",
349            has_uint_deletion_time: v >= "oa",
350            has_token_space_coverage: v >= "oa",
351        })
352    }
353
354    /// Returns `true` if this version is compatible for reading according to
355    /// `BigVersion.isCompatible()` (BigFormat.java:516-519).
356    ///
357    /// A version is compatible when:
358    /// - It is >= `ma` (the earliest supported version), **and**
359    /// - Its first letter is <= `o` (the first letter of the current `oa`)
360    pub fn is_compatible(&self) -> bool {
361        let v = self.version.as_str();
362        v >= "ma" && v.chars().next().is_some_and(|c| c <= 'o')
363    }
364
365    /// Returns `true` when this is a stock Cassandra 5.0 default-mode SSTable
366    /// (`nb` version — `storage_compatibility_mode = CASSANDRA_4`).
367    pub fn is_cassandra5_compat_mode(&self) -> bool {
368        self.version == "nb"
369    }
370
371    /// Returns `true` when this is a full Cassandra 5.0 SSTable (`oa` version —
372    /// `storage_compatibility_mode = NONE`).
373    pub fn is_cassandra5_native(&self) -> bool {
374        self.version == "oa"
375    }
376
377    /// Infallible constructor returning gates for the `nb` version (stock Cassandra 5.0
378    /// `storage_compatibility_mode = CASSANDRA_4`).
379    ///
380    /// Use this instead of `from_version("nb").expect(…)` in library code, which
381    /// violates the project's no-`expect` mandate.  The field values are the literal
382    /// results of evaluating `from_version("nb")`; a unit test in this module keeps
383    /// them in sync with `from_version`.
384    ///
385    /// VG3 fall-back: when the SSTable filename cannot be parsed the reader defaults
386    /// to these gates so existing behaviour is preserved.
387    pub fn nb_fallback() -> Self {
388        Self {
389            version: "nb".to_string(),
390            // Gates matching BigFormat.java for version "nb" ----------------
391            has_commit_log_lower_bound: true, // "nb" >= "mb"
392            has_commit_log_intervals: true,   // "nb" >= "mc"
393            has_accurate_min_max: true,       // "nb" in n[a-z]
394            has_legacy_min_max: true,         // "nb" in n[a-z]
395            has_originating_host_id: true,    // "nb" >= "nb"
396            has_max_compressed_length: true,  // "nb" >= "na"
397            has_pending_repair: true,         // "nb" >= "na"
398            has_is_transient: true,           // "nb" >= "na"
399            has_metadata_checksum: true,      // "nb" >= "na"
400            has_old_bf_format: false,         // "nb" NOT < "na"
401            // oa-only gates — all FALSE for nb
402            has_improved_min_max: false,
403            has_partition_level_deletion_presence_marker: false,
404            has_key_range: false,
405            has_uint_deletion_time: false,
406            has_token_space_coverage: false,
407        }
408    }
409}
410
411/// Feature gates for a BTI-format SSTable.
412///
413/// BtiFormat only has one version (`da`).  All modern feature gates are TRUE
414/// for `da` (BtiFormat.java:321-418).
415#[derive(Debug, Clone, PartialEq, Eq)]
416pub struct BtiVersionGates {
417    /// Raw version string (always `"da"` for BTI).
418    pub version: String,
419
420    /// All gates are TRUE for `da`.  Fields mirror the BIG gates for API parity.
421    pub has_commit_log_lower_bound: bool,
422    pub has_commit_log_intervals: bool,
423    pub has_max_compressed_length: bool,
424    pub has_pending_repair: bool,
425    pub has_is_transient: bool,
426    pub has_metadata_checksum: bool,
427    /// `hasOldBfFormat` is **FALSE** for BTI (BtiFormat.java:357-360).
428    pub has_old_bf_format: bool,
429    pub has_originating_host_id: bool,
430    /// `hasAccurateMinMax` — **TRUE** for BTI `da`.
431    ///
432    /// Source: BtiFormat.java:363-366
433    /// ```java
434    /// public boolean hasAccurateMinMax() { return true; }
435    /// ```
436    pub has_accurate_min_max: bool,
437    /// `hasLegacyMinMax` — **FALSE** for BTI `da`.
438    ///
439    /// Source: BtiFormat.java:368-371
440    /// ```java
441    /// public boolean hasLegacyMinMax() { return false; }
442    /// ```
443    pub has_legacy_min_max: bool,
444    pub has_improved_min_max: bool,
445    pub has_token_space_coverage: bool,
446    pub has_partition_level_deletion_presence_marker: bool,
447    pub has_key_range: bool,
448    pub has_uint_deletion_time: bool,
449}
450
451impl BtiVersionGates {
452    /// Compute BTI gates for the given version string.
453    ///
454    /// # Errors
455    ///
456    /// Returns `Err` if the version is not `"da"` (the only BTI version).
457    pub fn from_version(version: &str) -> Result<Self> {
458        if version != "da" {
459            return Err(Error::InvalidFormat(format!(
460                "BTI format only supports version 'da', got {:?}",
461                version
462            )));
463        }
464        Ok(Self {
465            version: version.to_string(),
466            has_commit_log_lower_bound: true,
467            has_commit_log_intervals: true,
468            has_max_compressed_length: true,
469            has_pending_repair: true,
470            has_is_transient: true,
471            has_metadata_checksum: true,
472            has_old_bf_format: false, // Always false for BTI (BtiFormat.java:357-360)
473            has_originating_host_id: true,
474            // BtiFormat.java:363-366: `public boolean hasAccurateMinMax() { return true; }`
475            has_accurate_min_max: true,
476            // BtiFormat.java:368-371: `public boolean hasLegacyMinMax() { return false; }`
477            has_legacy_min_max: false,
478            has_improved_min_max: true,
479            has_token_space_coverage: true,
480            has_partition_level_deletion_presence_marker: true,
481            has_key_range: true,
482            has_uint_deletion_time: true,
483        })
484    }
485}
486
487/// Combined version-gate result for any SSTable (BIG or BTI).
488#[derive(Debug, Clone, PartialEq, Eq)]
489pub enum VersionGates {
490    /// BIG-format gates.
491    Big(BigVersionGates),
492    /// BTI-format gates.
493    Bti(BtiVersionGates),
494}
495
496impl VersionGates {
497    /// Compute gates from a parsed `SsTableDescriptor`.
498    pub fn from_descriptor(desc: &SsTableDescriptor) -> Result<Self> {
499        match desc.format {
500            SsTableFormat::Big => BigVersionGates::from_version(&desc.version).map(Self::Big),
501            SsTableFormat::Bti => BtiVersionGates::from_version(&desc.version).map(Self::Bti),
502        }
503    }
504
505    /// Compute gates directly from a file path.
506    pub fn from_path(path: &Path) -> Result<Self> {
507        let desc = SsTableDescriptor::parse(path)?;
508        Self::from_descriptor(&desc)
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515    use std::path::PathBuf;
516
517    // -----------------------------------------------------------------------
518    // SsTableDescriptor filename parsing
519    // -----------------------------------------------------------------------
520
521    #[test]
522    fn test_descriptor_sequential_id() {
523        let desc = SsTableDescriptor::parse_filename("nb-1-big-Data.db").unwrap();
524        assert_eq!(desc.version, "nb");
525        assert_eq!(desc.sstable_id, "1");
526        assert_eq!(desc.format, SsTableFormat::Big);
527        assert_eq!(desc.component, "Data.db");
528    }
529
530    #[test]
531    fn test_descriptor_uuid_id_no_hyphens() {
532        // UUID form used in the CQLite test corpus: 32-hex-char id with no hyphens
533        let filename = "nb-6aa08200a25111f0a3fef1a551383fb9-big-Data.db";
534        let desc = SsTableDescriptor::parse_filename(filename).unwrap();
535        assert_eq!(desc.version, "nb");
536        assert_eq!(desc.sstable_id, "6aa08200a25111f0a3fef1a551383fb9");
537        assert_eq!(desc.format, SsTableFormat::Big);
538        assert_eq!(desc.component, "Data.db");
539    }
540
541    #[test]
542    fn test_descriptor_oa_version() {
543        let desc = SsTableDescriptor::parse_filename("oa-1-big-Data.db").unwrap();
544        assert_eq!(desc.version, "oa");
545        assert_eq!(desc.format, SsTableFormat::Big);
546    }
547
548    #[test]
549    fn test_descriptor_da_bti_version() {
550        let desc = SsTableDescriptor::parse_filename("da-1-bti-Partitions.db").unwrap();
551        assert_eq!(desc.version, "da");
552        assert_eq!(desc.format, SsTableFormat::Bti);
553        assert_eq!(desc.component, "Partitions.db");
554    }
555
556    #[test]
557    fn test_descriptor_legacy_versions() {
558        for version in &["ma", "mb", "mc", "md", "me", "na"] {
559            let filename = format!("{}-3-big-Data.db", version);
560            let desc = SsTableDescriptor::parse_filename(&filename).unwrap();
561            assert_eq!(desc.version, *version, "version mismatch for {}", filename);
562            assert_eq!(desc.format, SsTableFormat::Big);
563        }
564    }
565
566    #[test]
567    fn test_descriptor_toc_txt() {
568        let desc = SsTableDescriptor::parse_filename("nb-1-big-TOC.txt").unwrap();
569        assert_eq!(desc.version, "nb");
570        assert_eq!(desc.component, "TOC.txt");
571    }
572
573    #[test]
574    fn test_descriptor_compression_info() {
575        let desc = SsTableDescriptor::parse_filename("nb-1-big-CompressionInfo.db").unwrap();
576        assert_eq!(desc.component, "CompressionInfo.db");
577    }
578
579    #[test]
580    fn test_descriptor_invalid_too_few_parts() {
581        assert!(SsTableDescriptor::parse_filename("nb-Data.db").is_err());
582        assert!(SsTableDescriptor::parse_filename("Data.db").is_err());
583    }
584
585    #[test]
586    fn test_descriptor_invalid_version_not_two_letters() {
587        assert!(SsTableDescriptor::parse_filename("nba-1-big-Data.db").is_err());
588        assert!(SsTableDescriptor::parse_filename("n-1-big-Data.db").is_err());
589    }
590
591    #[test]
592    fn test_descriptor_invalid_no_format_segment() {
593        assert!(SsTableDescriptor::parse_filename("nb-1-xxx-Data.db").is_err());
594    }
595
596    #[test]
597    fn test_descriptor_from_path() {
598        let path = PathBuf::from(
599            "test-data/datasets/sstables/test_basic/simple_table-6aa08200/nb-1-big-Data.db",
600        );
601        let desc = SsTableDescriptor::parse(&path).unwrap();
602        assert_eq!(desc.version, "nb");
603        assert_eq!(desc.format, SsTableFormat::Big);
604    }
605
606    // -----------------------------------------------------------------------
607    // BigVersionGates: nb (stock Cassandra 5.0 default)
608    // -----------------------------------------------------------------------
609
610    #[test]
611    fn test_big_nb_gates() {
612        let g = BigVersionGates::from_version("nb").unwrap();
613
614        // Gates that ARE set for nb
615        assert!(g.has_commit_log_lower_bound, "nb: hasCommitLogLowerBound");
616        assert!(g.has_commit_log_intervals, "nb: hasCommitLogIntervals");
617        assert!(g.has_max_compressed_length, "nb: hasMaxCompressedLength");
618        assert!(g.has_pending_repair, "nb: hasPendingRepair");
619        assert!(g.has_is_transient, "nb: hasIsTransient");
620        assert!(g.has_metadata_checksum, "nb: hasMetadataChecksum");
621        assert!(!g.has_old_bf_format, "nb: !hasOldBfFormat");
622        assert!(
623            g.has_originating_host_id,
624            "nb: hasOriginatingHostId (nb >= nb)"
625        );
626
627        // oa-only gates must be FALSE for nb
628        assert!(!g.has_improved_min_max, "nb: !hasImprovedMinMax (oa-only)");
629        assert!(
630            !g.has_partition_level_deletion_presence_marker,
631            "nb: !hasPartitionLevelDeletionPresenceMarker (oa-only)"
632        );
633        assert!(!g.has_key_range, "nb: !hasKeyRange (oa-only)");
634        assert!(
635            !g.has_uint_deletion_time,
636            "nb: !hasUIntDeletionTime (oa-only)"
637        );
638        assert!(
639            !g.has_token_space_coverage,
640            "nb: !hasTokenSpaceCoverage (oa-only)"
641        );
642    }
643
644    // -----------------------------------------------------------------------
645    // BigVersionGates: oa (Cassandra 5.0 native mode)
646    // -----------------------------------------------------------------------
647
648    #[test]
649    fn test_big_oa_gates() {
650        let g = BigVersionGates::from_version("oa").unwrap();
651
652        // All na+ gates still set
653        assert!(g.has_commit_log_lower_bound);
654        assert!(g.has_commit_log_intervals);
655        assert!(g.has_max_compressed_length);
656        assert!(g.has_pending_repair);
657        assert!(g.has_is_transient);
658        assert!(g.has_metadata_checksum);
659        assert!(!g.has_old_bf_format);
660        assert!(g.has_originating_host_id, "oa >= nb");
661
662        // oa-only gates must be TRUE for oa
663        assert!(g.has_improved_min_max, "oa: hasImprovedMinMax");
664        assert!(
665            g.has_partition_level_deletion_presence_marker,
666            "oa: hasPartitionLevelDeletionPresenceMarker"
667        );
668        assert!(g.has_key_range, "oa: hasKeyRange");
669        assert!(g.has_uint_deletion_time, "oa: hasUIntDeletionTime");
670        assert!(g.has_token_space_coverage, "oa: hasTokenSpaceCoverage");
671
672        // AccurateMinMax is deprecated in oa — should be FALSE
673        assert!(
674            !g.has_accurate_min_max,
675            "oa: hasAccurateMinMax MUST be false (deprecated)"
676        );
677        // LegacyMinMax also deprecated in oa
678        assert!(
679            !g.has_legacy_min_max,
680            "oa: hasLegacyMinMax MUST be false (deprecated)"
681        );
682    }
683
684    // -----------------------------------------------------------------------
685    // BigVersionGates: oa-only gates are NOT set on nb  (core correctness)
686    // -----------------------------------------------------------------------
687
688    #[test]
689    fn test_oa_only_gates_absent_from_nb() {
690        let nb = BigVersionGates::from_version("nb").unwrap();
691        let oa = BigVersionGates::from_version("oa").unwrap();
692
693        let oa_only_gate_names = [
694            (
695                "hasImprovedMinMax",
696                nb.has_improved_min_max,
697                oa.has_improved_min_max,
698            ),
699            (
700                "hasPartitionLevelDeletionPresenceMarker",
701                nb.has_partition_level_deletion_presence_marker,
702                oa.has_partition_level_deletion_presence_marker,
703            ),
704            ("hasKeyRange", nb.has_key_range, oa.has_key_range),
705            (
706                "hasUIntDeletionTime",
707                nb.has_uint_deletion_time,
708                oa.has_uint_deletion_time,
709            ),
710            (
711                "hasTokenSpaceCoverage",
712                nb.has_token_space_coverage,
713                oa.has_token_space_coverage,
714            ),
715        ];
716
717        for (name, nb_val, oa_val) in &oa_only_gate_names {
718            assert!(!nb_val, "nb.{} must be FALSE (oa-only gate)", name);
719            assert!(oa_val, "oa.{} must be TRUE", name);
720        }
721    }
722
723    // -----------------------------------------------------------------------
724    // BigVersionGates: hasOriginatingHostId straddle gate
725    // -----------------------------------------------------------------------
726
727    /// `hasOriginatingHostId` introduced in `me` (straddles letter boundary).
728    /// Source: BigFormat.java:400
729    ///   `version.compareTo("nb") >= 0 || version.matches("(m[e-z])")`
730    #[test]
731    fn test_originating_host_id_straddle_gate() {
732        // Must be FALSE for versions before me in the m-series
733        for v in &["ma", "mb", "mc", "md"] {
734            let g = BigVersionGates::from_version(v).unwrap();
735            assert!(
736                !g.has_originating_host_id,
737                "{}: hasOriginatingHostId must be FALSE",
738                v
739            );
740        }
741
742        // Must be TRUE for me..mz
743        for v in &["me", "mf", "mz"] {
744            let g = BigVersionGates::from_version(v).unwrap();
745            assert!(
746                g.has_originating_host_id,
747                "{}: hasOriginatingHostId must be TRUE (m[e-z] match)",
748                v
749            );
750        }
751
752        // Must be TRUE for na..nz (>= nb is lexicographically satisfied by the
753        // whole n-series above nb: na < nb so na must be FALSE)
754        let na = BigVersionGates::from_version("na").unwrap();
755        assert!(
756            !na.has_originating_host_id,
757            "na: hasOriginatingHostId must be FALSE (na < nb, not m[e-z])"
758        );
759
760        // nb and above: TRUE
761        for v in &["nb", "nc", "oa"] {
762            let g = BigVersionGates::from_version(v).unwrap();
763            assert!(
764                g.has_originating_host_id,
765                "{}: hasOriginatingHostId must be TRUE (>= nb)",
766                v
767            );
768        }
769    }
770
771    // -----------------------------------------------------------------------
772    // BigVersionGates: older versions
773    // -----------------------------------------------------------------------
774
775    #[test]
776    fn test_big_ma_gates() {
777        let g = BigVersionGates::from_version("ma").unwrap();
778        // ma has none of the later features
779        assert!(!g.has_commit_log_lower_bound);
780        assert!(!g.has_commit_log_intervals);
781        assert!(!g.has_accurate_min_max);
782        assert!(g.has_legacy_min_max, "ma is in m[a-z]");
783        assert!(!g.has_originating_host_id);
784        assert!(!g.has_max_compressed_length);
785        assert!(g.has_old_bf_format, "ma: hasOldBfFormat");
786        assert!(!g.has_improved_min_max);
787        assert!(!g.has_key_range);
788        assert!(!g.has_uint_deletion_time);
789        assert!(!g.has_token_space_coverage);
790    }
791
792    #[test]
793    fn test_big_na_gates() {
794        let g = BigVersionGates::from_version("na").unwrap();
795        assert!(g.has_commit_log_lower_bound);
796        assert!(g.has_commit_log_intervals);
797        assert!(g.has_accurate_min_max, "na is in n[a-z]");
798        assert!(g.has_legacy_min_max, "na is in n[a-z]");
799        assert!(!g.has_originating_host_id, "na < nb");
800        assert!(g.has_max_compressed_length);
801        assert!(g.has_pending_repair);
802        assert!(!g.has_old_bf_format);
803        assert!(!g.has_improved_min_max, "oa-only");
804    }
805
806    #[test]
807    fn test_big_md_gates() {
808        let g = BigVersionGates::from_version("md").unwrap();
809        assert!(g.has_accurate_min_max, "md is m[d-z]");
810        assert!(g.has_legacy_min_max, "md is m[a-z]");
811        assert!(!g.has_originating_host_id, "md < me");
812    }
813
814    #[test]
815    fn test_big_me_gates() {
816        let g = BigVersionGates::from_version("me").unwrap();
817        assert!(g.has_accurate_min_max, "me is m[d-z]");
818        assert!(g.has_originating_host_id, "me matches m[e-z]");
819    }
820
821    // -----------------------------------------------------------------------
822    // BigVersionGates: isCompatible
823    // -----------------------------------------------------------------------
824
825    #[test]
826    fn test_big_is_compatible() {
827        // All known valid versions should be compatible
828        for v in &["ma", "mb", "mc", "md", "me", "na", "nb", "oa"] {
829            let g = BigVersionGates::from_version(v).unwrap();
830            assert!(g.is_compatible(), "{} should be compatible", v);
831        }
832        // 'pa' would be next major after oa — not compatible if current is oa
833        // (first letter 'p' > 'o')
834        let pa = BigVersionGates::from_version("pa").unwrap();
835        assert!(
836            !pa.is_compatible(),
837            "pa is beyond current 'oa' major letter"
838        );
839    }
840
841    #[test]
842    fn test_big_is_cassandra5_mode() {
843        let nb = BigVersionGates::from_version("nb").unwrap();
844        assert!(nb.is_cassandra5_compat_mode());
845        assert!(!nb.is_cassandra5_native());
846
847        let oa = BigVersionGates::from_version("oa").unwrap();
848        assert!(!oa.is_cassandra5_compat_mode());
849        assert!(oa.is_cassandra5_native());
850    }
851
852    // -----------------------------------------------------------------------
853    // BigVersionGates::nb_fallback — must match from_version("nb") exactly
854    // -----------------------------------------------------------------------
855
856    /// Verify that `BigVersionGates::nb_fallback()` produces the same gate
857    /// values as `BigVersionGates::from_version("nb")`.  This test is the
858    /// automated guard that keeps the two in sync.
859    #[test]
860    fn test_nb_fallback_matches_from_version() {
861        let from_fn = BigVersionGates::from_version("nb").unwrap();
862        let fallback = BigVersionGates::nb_fallback();
863
864        assert_eq!(fallback.version, from_fn.version);
865        assert_eq!(
866            fallback.has_commit_log_lower_bound,
867            from_fn.has_commit_log_lower_bound
868        );
869        assert_eq!(
870            fallback.has_commit_log_intervals,
871            from_fn.has_commit_log_intervals
872        );
873        assert_eq!(fallback.has_accurate_min_max, from_fn.has_accurate_min_max);
874        assert_eq!(fallback.has_legacy_min_max, from_fn.has_legacy_min_max);
875        assert_eq!(
876            fallback.has_originating_host_id,
877            from_fn.has_originating_host_id
878        );
879        assert_eq!(
880            fallback.has_max_compressed_length,
881            from_fn.has_max_compressed_length
882        );
883        assert_eq!(fallback.has_pending_repair, from_fn.has_pending_repair);
884        assert_eq!(fallback.has_is_transient, from_fn.has_is_transient);
885        assert_eq!(
886            fallback.has_metadata_checksum,
887            from_fn.has_metadata_checksum
888        );
889        assert_eq!(fallback.has_old_bf_format, from_fn.has_old_bf_format);
890        assert_eq!(fallback.has_improved_min_max, from_fn.has_improved_min_max);
891        assert_eq!(
892            fallback.has_partition_level_deletion_presence_marker,
893            from_fn.has_partition_level_deletion_presence_marker
894        );
895        assert_eq!(fallback.has_key_range, from_fn.has_key_range);
896        assert_eq!(
897            fallback.has_uint_deletion_time,
898            from_fn.has_uint_deletion_time
899        );
900        assert_eq!(
901            fallback.has_token_space_coverage,
902            from_fn.has_token_space_coverage
903        );
904    }
905
906    // -----------------------------------------------------------------------
907    // BigVersionGates: invalid input
908    // -----------------------------------------------------------------------
909
910    #[test]
911    fn test_big_invalid_version() {
912        assert!(BigVersionGates::from_version("n").is_err());
913        assert!(BigVersionGates::from_version("nba").is_err());
914        assert!(BigVersionGates::from_version("NB").is_err());
915        assert!(BigVersionGates::from_version("").is_err());
916    }
917
918    // -----------------------------------------------------------------------
919    // BtiVersionGates: da
920    // -----------------------------------------------------------------------
921
922    #[test]
923    fn test_bti_da_gates() {
924        let g = BtiVersionGates::from_version("da").unwrap();
925        assert!(g.has_commit_log_lower_bound);
926        assert!(g.has_commit_log_intervals);
927        assert!(g.has_max_compressed_length);
928        assert!(g.has_pending_repair);
929        assert!(g.has_is_transient);
930        assert!(g.has_metadata_checksum);
931        assert!(!g.has_old_bf_format, "da: !hasOldBfFormat");
932        assert!(g.has_originating_host_id);
933        // BtiFormat.java:363-366: hasAccurateMinMax() → true
934        assert!(
935            g.has_accurate_min_max,
936            "da: hasAccurateMinMax (BtiFormat.java:363)"
937        );
938        // BtiFormat.java:368-371: hasLegacyMinMax() → false
939        assert!(
940            !g.has_legacy_min_max,
941            "da: !hasLegacyMinMax (BtiFormat.java:368)"
942        );
943        assert!(g.has_improved_min_max);
944        assert!(g.has_token_space_coverage);
945        assert!(g.has_partition_level_deletion_presence_marker);
946        assert!(g.has_key_range);
947        assert!(g.has_uint_deletion_time);
948    }
949
950    #[test]
951    fn test_bti_rejects_non_da() {
952        assert!(BtiVersionGates::from_version("nb").is_err());
953        assert!(BtiVersionGates::from_version("oa").is_err());
954        assert!(BtiVersionGates::from_version("na").is_err());
955    }
956
957    // -----------------------------------------------------------------------
958    // VersionGates combined
959    // -----------------------------------------------------------------------
960
961    #[test]
962    fn test_version_gates_from_path_nb() {
963        let path = PathBuf::from("nb-1-big-Data.db");
964        let gates = VersionGates::from_path(&path).unwrap();
965        match gates {
966            VersionGates::Big(g) => assert_eq!(g.version, "nb"),
967            VersionGates::Bti(_) => panic!("Expected Big"),
968        }
969    }
970
971    #[test]
972    fn test_version_gates_from_path_oa() {
973        let path = PathBuf::from("oa-1-big-Data.db");
974        let gates = VersionGates::from_path(&path).unwrap();
975        match gates {
976            VersionGates::Big(g) => {
977                assert_eq!(g.version, "oa");
978                assert!(g.has_uint_deletion_time);
979            }
980            VersionGates::Bti(_) => panic!("Expected Big"),
981        }
982    }
983
984    #[test]
985    fn test_version_gates_from_path_da() {
986        let path = PathBuf::from("da-1-bti-Partitions.db");
987        let gates = VersionGates::from_path(&path).unwrap();
988        match gates {
989            VersionGates::Bti(g) => assert_eq!(g.version, "da"),
990            VersionGates::Big(_) => panic!("Expected Bti"),
991        }
992    }
993
994    /// Verify that UUID-based ids (corpus filenames) parse correctly into gates.
995    #[test]
996    fn test_version_gates_from_corpus_filename() {
997        let path = PathBuf::from("nb-6aa08200a25111f0a3fef1a551383fb9-big-Data.db");
998        let gates = VersionGates::from_path(&path).unwrap();
999        match gates {
1000            VersionGates::Big(g) => {
1001                assert_eq!(g.version, "nb");
1002                // oa-only gates must be absent
1003                assert!(!g.has_improved_min_max);
1004                assert!(!g.has_uint_deletion_time);
1005            }
1006            VersionGates::Bti(_) => panic!("Expected Big"),
1007        }
1008    }
1009
1010    // -----------------------------------------------------------------------
1011    // Docker-generated fixture filenames
1012    // These filenames come from Cassandra 5.0.8 containers run with:
1013    //   storage_compatibility_mode: NONE  (for oa)
1014    //   sstable.selected_format: bti       (for da)
1015    // -----------------------------------------------------------------------
1016
1017    /// `oa-2-big-Data.db` generated by Cassandra 5.0.8 with
1018    /// `storage_compatibility_mode: NONE`.
1019    #[test]
1020    fn test_descriptor_docker_oa_sequential() {
1021        let desc = SsTableDescriptor::parse_filename("oa-2-big-Data.db").unwrap();
1022        assert_eq!(desc.version, "oa");
1023        assert_eq!(desc.sstable_id, "2");
1024        assert_eq!(desc.format, SsTableFormat::Big);
1025        assert_eq!(desc.component, "Data.db");
1026    }
1027
1028    /// Gates for the Docker-generated `oa` fixture must have all 5 oa-only
1029    /// gates TRUE.
1030    #[test]
1031    fn test_gates_docker_oa_fixture() {
1032        let gates = VersionGates::from_path(&PathBuf::from("oa-2-big-Data.db")).unwrap();
1033        match gates {
1034            VersionGates::Big(g) => {
1035                assert_eq!(g.version, "oa");
1036                assert!(g.has_improved_min_max, "oa fixture: hasImprovedMinMax");
1037                assert!(
1038                    g.has_partition_level_deletion_presence_marker,
1039                    "oa fixture: hasPartitionLevelDeletionPresenceMarker"
1040                );
1041                assert!(g.has_key_range, "oa fixture: hasKeyRange");
1042                assert!(g.has_uint_deletion_time, "oa fixture: hasUIntDeletionTime");
1043                assert!(
1044                    g.has_token_space_coverage,
1045                    "oa fixture: hasTokenSpaceCoverage"
1046                );
1047                // deprecated in oa
1048                assert!(
1049                    !g.has_accurate_min_max,
1050                    "oa fixture: hasAccurateMinMax deprecated"
1051                );
1052                assert!(
1053                    !g.has_legacy_min_max,
1054                    "oa fixture: hasLegacyMinMax deprecated"
1055                );
1056            }
1057            VersionGates::Bti(_) => panic!("Expected Big gates for oa-2-big-Data.db"),
1058        }
1059    }
1060
1061    /// `da-2-bti-Data.db` generated by Cassandra 5.0.8 with BTI format enabled.
1062    #[test]
1063    fn test_descriptor_docker_da_bti() {
1064        let desc = SsTableDescriptor::parse_filename("da-2-bti-Data.db").unwrap();
1065        assert_eq!(desc.version, "da");
1066        assert_eq!(desc.sstable_id, "2");
1067        assert_eq!(desc.format, SsTableFormat::Bti);
1068        assert_eq!(desc.component, "Data.db");
1069    }
1070
1071    /// `da-2-bti-Partitions.db` — BTI-specific index component.
1072    #[test]
1073    fn test_descriptor_docker_da_bti_partitions() {
1074        let desc = SsTableDescriptor::parse_filename("da-2-bti-Partitions.db").unwrap();
1075        assert_eq!(desc.version, "da");
1076        assert_eq!(desc.format, SsTableFormat::Bti);
1077        assert_eq!(desc.component, "Partitions.db");
1078    }
1079
1080    /// Gates for the Docker-generated `da` fixture: all BTI gates TRUE.
1081    #[test]
1082    fn test_gates_docker_da_fixture() {
1083        let gates = VersionGates::from_path(&PathBuf::from("da-2-bti-Data.db")).unwrap();
1084        match gates {
1085            VersionGates::Bti(g) => {
1086                assert_eq!(g.version, "da");
1087                assert!(g.has_improved_min_max, "da: hasImprovedMinMax");
1088                assert!(g.has_key_range, "da: hasKeyRange");
1089                assert!(g.has_uint_deletion_time, "da: hasUIntDeletionTime");
1090                assert!(g.has_token_space_coverage, "da: hasTokenSpaceCoverage");
1091                assert!(
1092                    g.has_partition_level_deletion_presence_marker,
1093                    "da: hasPartitionLevelDeletionPresenceMarker"
1094                );
1095                assert!(!g.has_old_bf_format, "da: !hasOldBfFormat");
1096                assert!(g.has_originating_host_id, "da: hasOriginatingHostId");
1097                // BtiFormat.java:363-371
1098                assert!(g.has_accurate_min_max, "da: hasAccurateMinMax");
1099                assert!(!g.has_legacy_min_max, "da: !hasLegacyMinMax");
1100            }
1101            VersionGates::Big(_) => panic!("Expected Bti gates for da-2-bti-Data.db"),
1102        }
1103    }
1104}