use std::path::{Path, PathBuf};
use crate::error::{Error, Result};
#[derive(Debug, Clone)]
pub struct KeyspaceInfo {
pub name: String,
pub tables: Vec<TableInfo>,
}
#[derive(Debug, Clone)]
pub struct TableInfo {
pub qualified_name: String,
pub keyspace: String,
pub name: String,
pub sstable_count: usize,
pub path: PathBuf,
}
#[derive(Debug, Clone)]
pub struct ScanResult {
pub keyspaces: Vec<String>,
pub tables: Vec<String>,
pub sstable_count: usize,
pub keyspace_info: Vec<KeyspaceInfo>,
pub warnings: Vec<String>,
}
fn has_cassandra_table_uuid_suffix(dir_name: &str) -> bool {
if let Some(pos) = dir_name.rfind('-') {
let suffix = &dir_name[pos + 1..];
suffix.len() == 32 && suffix.chars().all(|c| c.is_ascii_hexdigit())
} else {
false
}
}
pub struct Scanner {
data_dir: PathBuf,
version_hint: Option<String>,
}
impl Scanner {
pub fn new(data_dir: &Path, version_hint: Option<String>) -> Self {
Self {
data_dir: data_dir.to_path_buf(),
version_hint,
}
}
pub fn scan(&self) -> Result<ScanResult> {
let mut keyspaces = Vec::new();
let mut tables = Vec::new();
let mut sstable_count = 0;
let mut keyspace_info = Vec::new();
let entries = std::fs::read_dir(&self.data_dir).map_err(|e| {
Error::Io(std::io::Error::new(
e.kind(),
format!(
"Failed to read data directory {}: {}",
self.data_dir.display(),
e
),
))
})?;
for entry in entries.flatten() {
if !entry.path().is_dir() {
continue;
}
let keyspace_name = entry.file_name().to_string_lossy().to_string();
if keyspace_name.starts_with("system") {
continue;
}
keyspaces.push(keyspace_name.clone());
let mut keyspace_tables = Vec::new();
if let Ok(table_entries) = std::fs::read_dir(entry.path()) {
for table_entry in table_entries.flatten() {
if !table_entry.path().is_dir() {
continue;
}
let table_dir_name = table_entry.file_name().to_string_lossy().to_string();
let table_name = table_dir_name
.split('-')
.next()
.unwrap_or(&table_dir_name)
.to_string();
let qualified_name = format!("{}.{}", keyspace_name, table_name);
let mut table_sstable_count = 0;
if let Ok(sstable_files) = std::fs::read_dir(table_entry.path()) {
for sstable_file in sstable_files.flatten() {
let file_name = sstable_file.file_name().to_string_lossy().to_string();
if file_name.ends_with("-Data.db") || file_name == "Data.db" {
table_sstable_count += 1;
sstable_count += 1;
}
}
}
tables.push(qualified_name.clone());
keyspace_tables.push(TableInfo {
qualified_name,
keyspace: keyspace_name.clone(),
name: table_name,
sstable_count: table_sstable_count,
path: table_entry.path(),
});
}
}
if !keyspace_tables.is_empty() {
keyspace_info.push(KeyspaceInfo {
name: keyspace_name,
tables: keyspace_tables,
});
}
}
let mut warnings = Vec::new();
if !tables.is_empty() {
let valid_table_dir_count = keyspace_info
.iter()
.flat_map(|k| &k.tables)
.filter(|t| {
t.path
.file_name()
.map(|n| has_cassandra_table_uuid_suffix(&n.to_string_lossy()))
.unwrap_or(false)
})
.count();
if valid_table_dir_count == 0 {
warnings.push(format!(
"Warning: No table directories with expected 'name-uuid' format found.\n\
The --data-dir may be pointing to the wrong directory level.\n\
Current path: {}\n\
Expected structure: <data-dir>/<keyspace>/<table>-<uuid>/\n\
Hint: Try using a subdirectory like: {}/sstables or {}/data",
self.data_dir.display(),
self.data_dir.display(),
self.data_dir.display()
));
}
}
Ok(ScanResult {
keyspaces,
tables,
sstable_count,
keyspace_info,
warnings,
})
}
pub fn resolve_version(&self, _scan_result: &ScanResult) -> Result<Option<String>> {
if let Some(hint) = &self.version_hint {
return Ok(Some(hint.clone()));
}
let metadata_path = self.data_dir.join("metadata.yml");
if metadata_path.exists() {
if let Ok(content) = std::fs::read_to_string(&metadata_path) {
for line in content.lines() {
if line.trim().starts_with("version:") {
let version = line
.trim()
.strip_prefix("version:")
.unwrap_or("")
.trim()
.trim_matches('"')
.trim_matches('\'')
.to_string();
if !version.is_empty() {
return Ok(Some(version));
}
}
}
}
}
Ok(Some("unknown".to_string()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_scanner_empty_directory() {
let temp_dir = TempDir::new().unwrap();
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
assert_eq!(result.sstable_count, 0);
assert!(result.keyspaces.is_empty());
assert!(result.tables.is_empty());
assert!(result.keyspace_info.is_empty());
}
#[test]
fn test_scanner_with_structure() {
let temp_dir = TempDir::new().unwrap();
let keyspace_dir = temp_dir.path().join("test_ks");
fs::create_dir(&keyspace_dir).unwrap();
let table_dir = keyspace_dir.join("users-6aa08200a25111f0a3fef1a551383fb9");
fs::create_dir(&table_dir).unwrap();
fs::write(table_dir.join("na-1-big-Data.db"), b"mock data").unwrap();
fs::write(table_dir.join("na-2-big-Data.db"), b"mock data").unwrap();
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
assert_eq!(result.sstable_count, 2);
assert_eq!(result.keyspaces.len(), 1);
assert!(result.keyspaces.contains(&"test_ks".to_string()));
assert_eq!(result.tables.len(), 1);
assert!(result.tables.iter().any(|t| t.starts_with("test_ks.users")));
assert_eq!(result.keyspace_info.len(), 1);
assert_eq!(result.keyspace_info[0].name, "test_ks");
assert_eq!(result.keyspace_info[0].tables.len(), 1);
assert_eq!(result.keyspace_info[0].tables[0].sstable_count, 2);
assert!(result.warnings.is_empty());
}
#[test]
fn test_scanner_skips_system_keyspaces() {
let temp_dir = TempDir::new().unwrap();
let system_dir = temp_dir.path().join("system");
fs::create_dir(&system_dir).unwrap();
let system_table_dir = system_dir.join("local-6aa08200a25111f0a3fef1a551383fb9");
fs::create_dir(&system_table_dir).unwrap();
fs::write(system_table_dir.join("Data.db"), b"mock").unwrap();
let user_dir = temp_dir.path().join("user_ks");
fs::create_dir(&user_dir).unwrap();
let user_table_dir = user_dir.join("table-7bb09311b36222f1b4fef2b662494fc0");
fs::create_dir(&user_table_dir).unwrap();
fs::write(user_table_dir.join("na-1-big-Data.db"), b"mock").unwrap();
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
assert_eq!(result.keyspaces.len(), 1);
assert!(result.keyspaces.contains(&"user_ks".to_string()));
assert!(!result.keyspaces.iter().any(|k| k.starts_with("system")));
assert_eq!(result.sstable_count, 1);
assert!(result.warnings.is_empty());
}
#[test]
fn test_resolve_version_with_hint() {
let temp_dir = TempDir::new().unwrap();
let scanner = Scanner::new(temp_dir.path(), Some("5.0".to_string()));
let result = scanner.scan().unwrap();
let version = scanner.resolve_version(&result).unwrap();
assert_eq!(version, Some("5.0".to_string()));
}
#[test]
fn test_resolve_version_from_metadata_yml() {
let temp_dir = TempDir::new().unwrap();
let metadata_content = "version: 5.0.1\nother: field\n";
fs::write(temp_dir.path().join("metadata.yml"), metadata_content).unwrap();
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
let version = scanner.resolve_version(&result).unwrap();
assert_eq!(version, Some("5.0.1".to_string()));
}
#[test]
fn test_resolve_version_unknown() {
let temp_dir = TempDir::new().unwrap();
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
let version = scanner.resolve_version(&result).unwrap();
assert_eq!(version, Some("unknown".to_string()));
}
#[test]
fn test_scanner_multiple_keyspaces() {
let temp_dir = TempDir::new().unwrap();
let uuids = [
"6aa08200a25111f0a3fef1a551383fb9",
"7bb09311b36222f1b4fef2b662494fc0",
"8cc0a422c47333f2c5fef3c773505fd1",
];
for (i, ks_name) in ["keyspace1", "keyspace2", "keyspace3"].iter().enumerate() {
let ks_dir = temp_dir.path().join(ks_name);
fs::create_dir(&ks_dir).unwrap();
let table_dir = ks_dir.join(format!("{}_table-{}", ks_name, uuids[i]));
fs::create_dir(&table_dir).unwrap();
fs::write(table_dir.join("na-1-big-Data.db"), b"mock").unwrap();
}
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
assert_eq!(result.keyspaces.len(), 3);
assert_eq!(result.tables.len(), 3);
assert_eq!(result.sstable_count, 3);
assert!(result.warnings.is_empty());
}
#[test]
fn test_scanner_warns_on_invalid_table_directory_format() {
let temp_dir = TempDir::new().unwrap();
let sstables_dir = temp_dir.path().join("sstables");
fs::create_dir(&sstables_dir).unwrap();
for ks_name in &["test_basic", "test_collections"] {
let dir = sstables_dir.join(ks_name);
fs::create_dir(&dir).unwrap();
fs::write(dir.join("na-1-big-Data.db"), b"mock").unwrap();
}
let scanner = Scanner::new(temp_dir.path(), None);
let result = scanner.scan().unwrap();
assert!(!result.tables.is_empty());
assert!(!result.warnings.is_empty());
assert!(result.warnings[0].contains("name-uuid"));
assert!(result.warnings[0].contains("wrong directory level"));
}
#[test]
fn test_scanner_invalid_directory() {
let scanner = Scanner::new(Path::new("/nonexistent/path"), None);
let result = scanner.scan();
assert!(result.is_err());
if let Err(Error::Io(io_err)) = result {
assert_eq!(io_err.kind(), std::io::ErrorKind::NotFound);
} else {
panic!("Expected Io error");
}
}
#[test]
fn test_has_cassandra_table_uuid_suffix() {
assert!(has_cassandra_table_uuid_suffix(
"simple_table-6aa08200a25111f0a3fef1a551383fb9"
));
assert!(has_cassandra_table_uuid_suffix(
"users-0123456789abcdef0123456789abcdef"
));
assert!(has_cassandra_table_uuid_suffix(
"my_table-ABCDEF0123456789ABCDEF0123456789"
));
assert!(!has_cassandra_table_uuid_suffix("test_basic"));
assert!(!has_cassandra_table_uuid_suffix("users"));
assert!(!has_cassandra_table_uuid_suffix("users-abc123"));
assert!(!has_cassandra_table_uuid_suffix("table-456"));
assert!(!has_cassandra_table_uuid_suffix(
"table-6aa08200a25111f0a3fef1a551383fb9extra"
));
assert!(!has_cassandra_table_uuid_suffix(
"table-6aa08200a25111f0a3fef1a551383fgz"
)); }
}