Skip to main content

cqlite_core/
version_hints.rs

1//! Version hint resolution with strict precedence chain
2//!
3//! This module implements the version hint precedence system defined in Issue #130.
4//! It provides authoritative version detection with explicit source tracking for
5//! diagnostic purposes.
6//!
7//! ## Precedence Chain
8//!
9//! 1. User flag (--cassandra-version) - highest priority
10//! 2. SSTable metadata (from individual SSTable files)
11//! 3. Dataset metadata.yml (from test data configuration)
12//! 4. Unknown - fallback when no sources provide version information
13//!
14//! ## No Heuristics Mandate (Issue #28)
15//!
16//! This module follows the no-heuristics mandate strictly:
17//! - Version information is only extracted from authoritative metadata sources
18//! - No guessing or inference based on file formats or structures
19//! - Missing version information results in "Unknown" status, not a guess
20//!
21//! ## Usage
22//!
23//! ```rust,no_run
24//! use cqlite_core::version_hints::{VersionHintResolver, VersionSource};
25//! use std::path::Path;
26//! use std::sync::Arc;
27//! use cqlite_core::{Config, Platform};
28//!
29//! # tokio_test::block_on(async {
30//! let config = Config::default();
31//! let platform = Arc::new(Platform::new(&config).await.unwrap());
32//!
33//! // Resolve version with user override
34//! let resolved = VersionHintResolver::resolve(
35//!     Some("5.0".to_string()),
36//!     Path::new("/path/to/sstable"),
37//!     platform.clone(),
38//! ).await.unwrap();
39//!
40//! assert_eq!(resolved.source, VersionSource::UserFlag);
41//! assert_eq!(resolved.version, Some("5.0".to_string()));
42//! # });
43//! ```
44
45use crate::{Error, Result};
46use serde::Deserialize;
47use std::path::Path;
48use std::sync::Arc;
49
50/// Source of version information in the precedence chain
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum VersionSource {
53    /// User-provided flag (--cassandra-version)
54    UserFlag,
55    /// SSTable metadata (from Data.db or Statistics.db)
56    SSTableMetadata,
57    /// Dataset metadata.yml
58    DatasetMetadata,
59    /// No version information available
60    Unknown,
61}
62
63impl VersionSource {
64    /// Get the precedence level (lower is higher priority)
65    pub fn precedence(&self) -> u8 {
66        match self {
67            VersionSource::UserFlag => 0,
68            VersionSource::SSTableMetadata => 1,
69            VersionSource::DatasetMetadata => 2,
70            VersionSource::Unknown => 255,
71        }
72    }
73
74    /// Get a human-readable description of this source
75    pub fn description(&self) -> &'static str {
76        match self {
77            VersionSource::UserFlag => "User-provided flag (--cassandra-version)",
78            VersionSource::SSTableMetadata => "SSTable metadata",
79            VersionSource::DatasetMetadata => "Dataset metadata.yml",
80            VersionSource::Unknown => "Unknown (no version information available)",
81        }
82    }
83}
84
85/// Resolved version information with source tracking
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub struct ResolvedVersion {
88    /// The resolved version string (e.g., "5.0", "4.0")
89    pub version: Option<String>,
90    /// The source that provided this version information
91    pub source: VersionSource,
92}
93
94impl ResolvedVersion {
95    /// Create a new resolved version
96    pub fn new(version: Option<String>, source: VersionSource) -> Self {
97        Self { version, source }
98    }
99
100    /// Check if a version was successfully resolved
101    pub fn is_known(&self) -> bool {
102        self.version.is_some()
103    }
104
105    /// Get the version string or return "unknown"
106    pub fn version_or_unknown(&self) -> &str {
107        self.version.as_deref().unwrap_or("unknown")
108    }
109}
110
111/// Metadata.yml structure (partial - only version field)
112#[derive(Debug, Clone, Deserialize)]
113struct DatasetMetadata {
114    cassandra_version: Option<String>,
115}
116
117/// Version hint resolution engine
118pub struct VersionHintResolver;
119
120impl VersionHintResolver {
121    /// Resolve version using the precedence chain
122    ///
123    /// # Arguments
124    ///
125    /// * `user_version` - User-provided version flag (highest priority)
126    /// * `sstable_path` - Path to SSTable directory or Data.db file
127    /// * `platform` - Platform abstraction for file I/O
128    ///
129    /// # Returns
130    ///
131    /// Returns a `ResolvedVersion` with the version string and source.
132    /// If no version can be determined, returns `Unknown` source with `None` version.
133    ///
134    /// # Errors
135    ///
136    /// Returns an error only for fatal I/O errors (not for missing metadata files).
137    /// Missing metadata.yml is not an error - it simply moves to the next precedence level.
138    pub async fn resolve(
139        user_version: Option<String>,
140        sstable_path: &Path,
141        platform: Arc<crate::Platform>,
142    ) -> Result<ResolvedVersion> {
143        // Precedence level 0: User flag
144        if let Some(version) = user_version {
145            return Ok(ResolvedVersion::new(Some(version), VersionSource::UserFlag));
146        }
147
148        // Precedence level 1: SSTable metadata
149        // TODO(Issue #130): Parse SSTable metadata when format spec is available
150        // Current Statistics.db doesn't contain version information in Cassandra 5.0
151        // This will be implemented when the format is extended or alternative sources
152        // (e.g., Data.db header metadata) are identified.
153        if let Some(version) = Self::parse_sstable_metadata(sstable_path, platform.clone()).await? {
154            return Ok(ResolvedVersion::new(
155                Some(version),
156                VersionSource::SSTableMetadata,
157            ));
158        }
159
160        // Precedence level 2: Dataset metadata.yml
161        if let Some(version) = Self::parse_dataset_metadata(sstable_path, platform).await? {
162            return Ok(ResolvedVersion::new(
163                Some(version),
164                VersionSource::DatasetMetadata,
165            ));
166        }
167
168        // Fallback: Unknown
169        Ok(ResolvedVersion::new(None, VersionSource::Unknown))
170    }
171
172    /// Parse version from SSTable metadata (Statistics.db or Data.db header)
173    ///
174    /// # Implementation Note
175    ///
176    /// This is currently a stub that always returns `Ok(None)` because:
177    /// - Cassandra 5.0 Statistics.db does not contain version information
178    /// - Data.db header version field is the SSTable format version, not Cassandra version
179    /// - No authoritative metadata source for Cassandra version in SSTable files yet
180    ///
181    /// This will be implemented when:
182    /// - Extended metadata format is added to Statistics.db
183    /// - Alternative authoritative source is identified
184    /// - Upstream Cassandra adds version metadata to SSTable files
185    async fn parse_sstable_metadata(
186        _sstable_path: &Path,
187        _platform: Arc<crate::Platform>,
188    ) -> Result<Option<String>> {
189        // TODO(Issue #130): Implement SSTable metadata parsing
190        // Current Cassandra 5.0 SSTable format does not include version in metadata
191        Ok(None)
192    }
193
194    /// Parse version from dataset metadata.yml
195    ///
196    /// This searches for metadata.yml in the following order:
197    /// 1. Same directory as SSTable file
198    /// 2. Parent directory (for sstables/ subdirectory layout)
199    /// 3. Grandparent directory (for nested dataset structures)
200    ///
201    /// Missing metadata.yml is NOT an error - it returns `Ok(None)`.
202    /// Invalid YAML format IS an error and returns `Err(Error::Parse(...))`.
203    async fn parse_dataset_metadata(
204        sstable_path: &Path,
205        platform: Arc<crate::Platform>,
206    ) -> Result<Option<String>> {
207        // Search for metadata.yml in current directory, parent, and grandparent
208        let search_paths = [
209            sstable_path.to_path_buf(),
210            sstable_path.parent().unwrap_or(sstable_path).to_path_buf(),
211            sstable_path
212                .parent()
213                .and_then(|p| p.parent())
214                .unwrap_or(sstable_path)
215                .to_path_buf(),
216        ];
217
218        for base_path in &search_paths {
219            let metadata_path = base_path.join("metadata.yml");
220
221            // Check if file exists using platform abstraction
222            if !platform.fs().exists(&metadata_path).await? {
223                continue;
224            }
225
226            // Read file contents
227            match platform.fs().read_file(&metadata_path).await {
228                Ok(contents) => {
229                    // Parse YAML
230                    let contents_str = String::from_utf8(contents).map_err(|e| {
231                        Error::parse(format!(
232                            "metadata.yml at {} is not valid UTF-8: {}",
233                            metadata_path.display(),
234                            e
235                        ))
236                    })?;
237
238                    let metadata: DatasetMetadata =
239                        serde_yaml::from_str(&contents_str).map_err(|e| {
240                            Error::parse(format!(
241                                "Failed to parse metadata.yml at {}: {}",
242                                metadata_path.display(),
243                                e
244                            ))
245                        })?;
246
247                    // Return version if present
248                    if let Some(version) = metadata.cassandra_version {
249                        return Ok(Some(version));
250                    }
251
252                    // metadata.yml found but no version field - continue search
253                    continue;
254                }
255                Err(e) => {
256                    // Distinguish between "not found" and actual I/O errors
257                    // Use ErrorKind instead of string matching for robustness
258                    match &e {
259                        Error::Io(io_err) if io_err.kind() == std::io::ErrorKind::NotFound => {
260                            // File not found - continue searching other paths
261                            continue;
262                        }
263                        _ => {
264                            // Real I/O error - propagate it
265                            return Err(e);
266                        }
267                    }
268                }
269            }
270        }
271
272        // No metadata.yml found in any search path - not an error
273        Ok(None)
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280    use crate::Config;
281    use std::sync::Arc;
282    use tempfile::TempDir;
283
284    #[test]
285    fn test_version_source_precedence() {
286        assert!(VersionSource::UserFlag.precedence() < VersionSource::SSTableMetadata.precedence());
287        assert!(
288            VersionSource::SSTableMetadata.precedence()
289                < VersionSource::DatasetMetadata.precedence()
290        );
291        assert!(VersionSource::DatasetMetadata.precedence() < VersionSource::Unknown.precedence());
292    }
293
294    #[test]
295    fn test_version_source_description() {
296        assert_eq!(
297            VersionSource::UserFlag.description(),
298            "User-provided flag (--cassandra-version)"
299        );
300        assert_eq!(
301            VersionSource::SSTableMetadata.description(),
302            "SSTable metadata"
303        );
304        assert_eq!(
305            VersionSource::DatasetMetadata.description(),
306            "Dataset metadata.yml"
307        );
308        assert_eq!(
309            VersionSource::Unknown.description(),
310            "Unknown (no version information available)"
311        );
312    }
313
314    #[test]
315    fn test_resolved_version_is_known() {
316        let known = ResolvedVersion::new(Some("5.0".to_string()), VersionSource::UserFlag);
317        assert!(known.is_known());
318
319        let unknown = ResolvedVersion::new(None, VersionSource::Unknown);
320        assert!(!unknown.is_known());
321    }
322
323    #[test]
324    fn test_resolved_version_or_unknown() {
325        let known = ResolvedVersion::new(Some("5.0".to_string()), VersionSource::UserFlag);
326        assert_eq!(known.version_or_unknown(), "5.0");
327
328        let unknown = ResolvedVersion::new(None, VersionSource::Unknown);
329        assert_eq!(unknown.version_or_unknown(), "unknown");
330    }
331
332    #[tokio::test]
333    async fn test_user_flag_precedence() {
334        let temp_dir = TempDir::new().unwrap();
335        let config = Config::default();
336        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
337
338        // User flag should override everything
339        let resolved =
340            VersionHintResolver::resolve(Some("5.0-user".to_string()), temp_dir.path(), platform)
341                .await
342                .unwrap();
343
344        assert_eq!(resolved.source, VersionSource::UserFlag);
345        assert_eq!(resolved.version, Some("5.0-user".to_string()));
346        assert!(resolved.is_known());
347    }
348
349    #[tokio::test]
350    async fn test_unknown_when_no_sources() {
351        let temp_dir = TempDir::new().unwrap();
352        let config = Config::default();
353        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
354
355        // No user flag, no metadata.yml
356        let resolved = VersionHintResolver::resolve(None, temp_dir.path(), platform)
357            .await
358            .unwrap();
359
360        assert_eq!(resolved.source, VersionSource::Unknown);
361        assert_eq!(resolved.version, None);
362        assert!(!resolved.is_known());
363        assert_eq!(resolved.version_or_unknown(), "unknown");
364    }
365
366    #[tokio::test]
367    async fn test_metadata_yml_parsing() {
368        let temp_dir = TempDir::new().unwrap();
369        let config = Config::default();
370        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
371
372        // Create metadata.yml with version
373        let metadata_content = "cassandra_version: \"5.0\"\nkeyspaces: []\n";
374        let metadata_path = temp_dir.path().join("metadata.yml");
375        platform
376            .fs()
377            .write_file(&metadata_path, metadata_content.as_bytes())
378            .await
379            .unwrap();
380
381        // Resolve should find metadata.yml
382        let resolved = VersionHintResolver::resolve(None, temp_dir.path(), platform)
383            .await
384            .unwrap();
385
386        assert_eq!(resolved.source, VersionSource::DatasetMetadata);
387        assert_eq!(resolved.version, Some("5.0".to_string()));
388    }
389
390    #[tokio::test]
391    async fn test_metadata_yml_parent_directory() {
392        let temp_dir = TempDir::new().unwrap();
393        let config = Config::default();
394        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
395
396        // Create metadata.yml in parent directory
397        let metadata_content = "cassandra_version: \"4.0\"\nkeyspaces: []\n";
398        let metadata_path = temp_dir.path().join("metadata.yml");
399        platform
400            .fs()
401            .write_file(&metadata_path, metadata_content.as_bytes())
402            .await
403            .unwrap();
404
405        // Create subdirectory for SSTable
406        let sstable_dir = temp_dir.path().join("sstables");
407        platform.fs().create_dir(&sstable_dir).await.unwrap();
408
409        // Resolve from subdirectory should find parent metadata.yml
410        let resolved = VersionHintResolver::resolve(None, &sstable_dir, platform)
411            .await
412            .unwrap();
413
414        assert_eq!(resolved.source, VersionSource::DatasetMetadata);
415        assert_eq!(resolved.version, Some("4.0".to_string()));
416    }
417
418    #[tokio::test]
419    async fn test_metadata_yml_invalid_yaml() {
420        let temp_dir = TempDir::new().unwrap();
421        let config = Config::default();
422        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
423
424        // Create invalid YAML
425        let metadata_path = temp_dir.path().join("metadata.yml");
426        platform
427            .fs()
428            .write_file(&metadata_path, b"invalid: yaml: syntax: error:")
429            .await
430            .unwrap();
431
432        // Should return parse error
433        let result = VersionHintResolver::resolve(None, temp_dir.path(), platform).await;
434        assert!(result.is_err());
435        assert!(result
436            .unwrap_err()
437            .to_string()
438            .contains("Failed to parse metadata.yml"));
439    }
440
441    #[tokio::test]
442    async fn test_metadata_yml_missing_version_field() {
443        let temp_dir = TempDir::new().unwrap();
444        let config = Config::default();
445        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
446
447        // Create metadata.yml without cassandra_version field
448        let metadata_content = "keyspaces: []\n";
449        let metadata_path = temp_dir.path().join("metadata.yml");
450        platform
451            .fs()
452            .write_file(&metadata_path, metadata_content.as_bytes())
453            .await
454            .unwrap();
455
456        // Should fall back to Unknown (missing field is not an error)
457        let resolved = VersionHintResolver::resolve(None, temp_dir.path(), platform)
458            .await
459            .unwrap();
460
461        assert_eq!(resolved.source, VersionSource::Unknown);
462        assert_eq!(resolved.version, None);
463    }
464
465    #[tokio::test]
466    async fn test_user_flag_overrides_metadata_yml() {
467        let temp_dir = TempDir::new().unwrap();
468        let config = Config::default();
469        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
470
471        // Create metadata.yml with version 5.0
472        let metadata_content = "cassandra_version: \"5.0\"\nkeyspaces: []\n";
473        let metadata_path = temp_dir.path().join("metadata.yml");
474        platform
475            .fs()
476            .write_file(&metadata_path, metadata_content.as_bytes())
477            .await
478            .unwrap();
479
480        // User flag should override metadata.yml
481        let resolved = VersionHintResolver::resolve(
482            Some("4.0-override".to_string()),
483            temp_dir.path(),
484            platform,
485        )
486        .await
487        .unwrap();
488
489        assert_eq!(resolved.source, VersionSource::UserFlag);
490        assert_eq!(resolved.version, Some("4.0-override".to_string()));
491    }
492
493    #[tokio::test]
494    async fn test_not_found_error_robustness() {
495        // This test verifies that the ErrorKind-based approach correctly handles
496        // NotFound errors regardless of OS locale or error message wording.
497        // It demonstrates the fix for the brittle string-based error detection.
498
499        let temp_dir = TempDir::new().unwrap();
500        let config = Config::default();
501        let platform = Arc::new(crate::Platform::new(&config).await.unwrap());
502
503        // No metadata.yml exists - should continue search and return Unknown
504        let resolved = VersionHintResolver::resolve(None, temp_dir.path(), platform)
505            .await
506            .unwrap();
507
508        assert_eq!(resolved.source, VersionSource::Unknown);
509        assert_eq!(resolved.version, None);
510    }
511}