1use std::path::{Path, PathBuf};
7
8use crate::error::{Error, Result};
9
10#[derive(Debug, Clone)]
12pub struct KeyspaceInfo {
13 pub name: String,
15 pub tables: Vec<TableInfo>,
17}
18
19#[derive(Debug, Clone)]
21pub struct TableInfo {
22 pub qualified_name: String,
24 pub keyspace: String,
26 pub name: String,
28 pub sstable_count: usize,
30 pub path: PathBuf,
32}
33
34#[derive(Debug, Clone)]
36pub struct ScanResult {
37 pub keyspaces: Vec<String>,
39 pub tables: Vec<String>,
41 pub sstable_count: usize,
43 pub keyspace_info: Vec<KeyspaceInfo>,
45 pub warnings: Vec<String>,
47}
48
49fn has_cassandra_table_uuid_suffix(dir_name: &str) -> bool {
60 if let Some(pos) = dir_name.rfind('-') {
61 let suffix = &dir_name[pos + 1..];
62 suffix.len() == 32 && suffix.chars().all(|c| c.is_ascii_hexdigit())
64 } else {
65 false
66 }
67}
68
69pub struct Scanner {
71 data_dir: PathBuf,
72 version_hint: Option<String>,
73}
74
75impl Scanner {
76 pub fn new(data_dir: &Path, version_hint: Option<String>) -> Self {
78 Self {
79 data_dir: data_dir.to_path_buf(),
80 version_hint,
81 }
82 }
83
84 pub fn scan(&self) -> Result<ScanResult> {
94 let mut keyspaces = Vec::new();
95 let mut tables = Vec::new();
96 let mut sstable_count = 0;
97 let mut keyspace_info = Vec::new();
98
99 let entries = std::fs::read_dir(&self.data_dir).map_err(|e| {
101 Error::Io(std::io::Error::new(
102 e.kind(),
103 format!(
104 "Failed to read data directory {}: {}",
105 self.data_dir.display(),
106 e
107 ),
108 ))
109 })?;
110
111 for entry in entries.flatten() {
112 if !entry.path().is_dir() {
113 continue;
114 }
115
116 let keyspace_name = entry.file_name().to_string_lossy().to_string();
117
118 if keyspace_name.starts_with("system") {
120 continue;
121 }
122
123 keyspaces.push(keyspace_name.clone());
124
125 let mut keyspace_tables = Vec::new();
127 if let Ok(table_entries) = std::fs::read_dir(entry.path()) {
128 for table_entry in table_entries.flatten() {
129 if !table_entry.path().is_dir() {
130 continue;
131 }
132
133 let table_dir_name = table_entry.file_name().to_string_lossy().to_string();
134
135 let table_name = table_dir_name
137 .split('-')
138 .next()
139 .unwrap_or(&table_dir_name)
140 .to_string();
141
142 let qualified_name = format!("{}.{}", keyspace_name, table_name);
143
144 let mut table_sstable_count = 0;
146 if let Ok(sstable_files) = std::fs::read_dir(table_entry.path()) {
147 for sstable_file in sstable_files.flatten() {
148 let file_name = sstable_file.file_name().to_string_lossy().to_string();
149 if file_name.ends_with("-Data.db") || file_name == "Data.db" {
151 table_sstable_count += 1;
152 sstable_count += 1;
153 }
154 }
155 }
156
157 tables.push(qualified_name.clone());
158 keyspace_tables.push(TableInfo {
159 qualified_name,
160 keyspace: keyspace_name.clone(),
161 name: table_name,
162 sstable_count: table_sstable_count,
163 path: table_entry.path(),
164 });
165 }
166 }
167
168 if !keyspace_tables.is_empty() {
169 keyspace_info.push(KeyspaceInfo {
170 name: keyspace_name,
171 tables: keyspace_tables,
172 });
173 }
174 }
175
176 let mut warnings = Vec::new();
178 if !tables.is_empty() {
179 let valid_table_dir_count = keyspace_info
180 .iter()
181 .flat_map(|k| &k.tables)
182 .filter(|t| {
183 t.path
184 .file_name()
185 .map(|n| has_cassandra_table_uuid_suffix(&n.to_string_lossy()))
186 .unwrap_or(false)
187 })
188 .count();
189
190 if valid_table_dir_count == 0 {
191 warnings.push(format!(
192 "Warning: No table directories with expected 'name-uuid' format found.\n\
193 The --data-dir may be pointing to the wrong directory level.\n\
194 Current path: {}\n\
195 Expected structure: <data-dir>/<keyspace>/<table>-<uuid>/\n\
196 Hint: Try using a subdirectory like: {}/sstables or {}/data",
197 self.data_dir.display(),
198 self.data_dir.display(),
199 self.data_dir.display()
200 ));
201 }
202 }
203
204 Ok(ScanResult {
205 keyspaces,
206 tables,
207 sstable_count,
208 keyspace_info,
209 warnings,
210 })
211 }
212
213 pub fn resolve_version(&self, _scan_result: &ScanResult) -> Result<Option<String>> {
219 if let Some(hint) = &self.version_hint {
221 return Ok(Some(hint.clone()));
222 }
223
224 let metadata_path = self.data_dir.join("metadata.yml");
230 if metadata_path.exists() {
231 if let Ok(content) = std::fs::read_to_string(&metadata_path) {
232 for line in content.lines() {
234 if line.trim().starts_with("version:") {
235 let version = line
236 .trim()
237 .strip_prefix("version:")
238 .unwrap_or("")
239 .trim()
240 .trim_matches('"')
241 .trim_matches('\'')
242 .to_string();
243 if !version.is_empty() {
244 return Ok(Some(version));
245 }
246 }
247 }
248 }
249 }
250
251 Ok(Some("unknown".to_string()))
253 }
254}
255
256#[cfg(test)]
257mod tests {
258 use super::*;
259 use std::fs;
260 use tempfile::TempDir;
261
262 #[test]
263 fn test_scanner_empty_directory() {
264 let temp_dir = TempDir::new().unwrap();
265 let scanner = Scanner::new(temp_dir.path(), None);
266 let result = scanner.scan().unwrap();
267
268 assert_eq!(result.sstable_count, 0);
269 assert!(result.keyspaces.is_empty());
270 assert!(result.tables.is_empty());
271 assert!(result.keyspace_info.is_empty());
272 }
273
274 #[test]
275 fn test_scanner_with_structure() {
276 let temp_dir = TempDir::new().unwrap();
277
278 let keyspace_dir = temp_dir.path().join("test_ks");
280 fs::create_dir(&keyspace_dir).unwrap();
281
282 let table_dir = keyspace_dir.join("users-6aa08200a25111f0a3fef1a551383fb9");
284 fs::create_dir(&table_dir).unwrap();
285
286 fs::write(table_dir.join("na-1-big-Data.db"), b"mock data").unwrap();
288 fs::write(table_dir.join("na-2-big-Data.db"), b"mock data").unwrap();
289
290 let scanner = Scanner::new(temp_dir.path(), None);
291 let result = scanner.scan().unwrap();
292
293 assert_eq!(result.sstable_count, 2);
294 assert_eq!(result.keyspaces.len(), 1);
295 assert!(result.keyspaces.contains(&"test_ks".to_string()));
296 assert_eq!(result.tables.len(), 1);
297 assert!(result.tables.iter().any(|t| t.starts_with("test_ks.users")));
298 assert_eq!(result.keyspace_info.len(), 1);
299 assert_eq!(result.keyspace_info[0].name, "test_ks");
300 assert_eq!(result.keyspace_info[0].tables.len(), 1);
301 assert_eq!(result.keyspace_info[0].tables[0].sstable_count, 2);
302 assert!(result.warnings.is_empty());
304 }
305
306 #[test]
307 fn test_scanner_skips_system_keyspaces() {
308 let temp_dir = TempDir::new().unwrap();
309
310 let system_dir = temp_dir.path().join("system");
312 fs::create_dir(&system_dir).unwrap();
313 let system_table_dir = system_dir.join("local-6aa08200a25111f0a3fef1a551383fb9");
314 fs::create_dir(&system_table_dir).unwrap();
315 fs::write(system_table_dir.join("Data.db"), b"mock").unwrap();
316
317 let user_dir = temp_dir.path().join("user_ks");
319 fs::create_dir(&user_dir).unwrap();
320 let user_table_dir = user_dir.join("table-7bb09311b36222f1b4fef2b662494fc0");
321 fs::create_dir(&user_table_dir).unwrap();
322 fs::write(user_table_dir.join("na-1-big-Data.db"), b"mock").unwrap();
323
324 let scanner = Scanner::new(temp_dir.path(), None);
325 let result = scanner.scan().unwrap();
326
327 assert_eq!(result.keyspaces.len(), 1);
329 assert!(result.keyspaces.contains(&"user_ks".to_string()));
330 assert!(!result.keyspaces.iter().any(|k| k.starts_with("system")));
331 assert_eq!(result.sstable_count, 1);
332 assert!(result.warnings.is_empty());
334 }
335
336 #[test]
337 fn test_resolve_version_with_hint() {
338 let temp_dir = TempDir::new().unwrap();
339 let scanner = Scanner::new(temp_dir.path(), Some("5.0".to_string()));
340 let result = scanner.scan().unwrap();
341 let version = scanner.resolve_version(&result).unwrap();
342
343 assert_eq!(version, Some("5.0".to_string()));
344 }
345
346 #[test]
347 fn test_resolve_version_from_metadata_yml() {
348 let temp_dir = TempDir::new().unwrap();
349 let metadata_content = "version: 5.0.1\nother: field\n";
350 fs::write(temp_dir.path().join("metadata.yml"), metadata_content).unwrap();
351
352 let scanner = Scanner::new(temp_dir.path(), None);
353 let result = scanner.scan().unwrap();
354 let version = scanner.resolve_version(&result).unwrap();
355
356 assert_eq!(version, Some("5.0.1".to_string()));
357 }
358
359 #[test]
360 fn test_resolve_version_unknown() {
361 let temp_dir = TempDir::new().unwrap();
362 let scanner = Scanner::new(temp_dir.path(), None);
363 let result = scanner.scan().unwrap();
364 let version = scanner.resolve_version(&result).unwrap();
365
366 assert_eq!(version, Some("unknown".to_string()));
367 }
368
369 #[test]
370 fn test_scanner_multiple_keyspaces() {
371 let temp_dir = TempDir::new().unwrap();
372
373 let uuids = [
375 "6aa08200a25111f0a3fef1a551383fb9",
376 "7bb09311b36222f1b4fef2b662494fc0",
377 "8cc0a422c47333f2c5fef3c773505fd1",
378 ];
379
380 for (i, ks_name) in ["keyspace1", "keyspace2", "keyspace3"].iter().enumerate() {
382 let ks_dir = temp_dir.path().join(ks_name);
383 fs::create_dir(&ks_dir).unwrap();
384
385 let table_dir = ks_dir.join(format!("{}_table-{}", ks_name, uuids[i]));
386 fs::create_dir(&table_dir).unwrap();
387 fs::write(table_dir.join("na-1-big-Data.db"), b"mock").unwrap();
388 }
389
390 let scanner = Scanner::new(temp_dir.path(), None);
391 let result = scanner.scan().unwrap();
392
393 assert_eq!(result.keyspaces.len(), 3);
394 assert_eq!(result.tables.len(), 3);
395 assert_eq!(result.sstable_count, 3);
396 assert!(result.warnings.is_empty());
398 }
399
400 #[test]
401 fn test_scanner_warns_on_invalid_table_directory_format() {
402 let temp_dir = TempDir::new().unwrap();
403
404 let sstables_dir = temp_dir.path().join("sstables");
407 fs::create_dir(&sstables_dir).unwrap();
408
409 for ks_name in &["test_basic", "test_collections"] {
412 let dir = sstables_dir.join(ks_name);
413 fs::create_dir(&dir).unwrap();
414 fs::write(dir.join("na-1-big-Data.db"), b"mock").unwrap();
416 }
417
418 let scanner = Scanner::new(temp_dir.path(), None);
419 let result = scanner.scan().unwrap();
420
421 assert!(!result.tables.is_empty());
423 assert!(!result.warnings.is_empty());
425 assert!(result.warnings[0].contains("name-uuid"));
426 assert!(result.warnings[0].contains("wrong directory level"));
427 }
428
429 #[test]
430 fn test_scanner_invalid_directory() {
431 let scanner = Scanner::new(Path::new("/nonexistent/path"), None);
432 let result = scanner.scan();
433
434 assert!(result.is_err());
435 if let Err(Error::Io(io_err)) = result {
436 assert_eq!(io_err.kind(), std::io::ErrorKind::NotFound);
437 } else {
438 panic!("Expected Io error");
439 }
440 }
441
442 #[test]
443 fn test_has_cassandra_table_uuid_suffix() {
444 assert!(has_cassandra_table_uuid_suffix(
446 "simple_table-6aa08200a25111f0a3fef1a551383fb9"
447 ));
448 assert!(has_cassandra_table_uuid_suffix(
449 "users-0123456789abcdef0123456789abcdef"
450 ));
451 assert!(has_cassandra_table_uuid_suffix(
452 "my_table-ABCDEF0123456789ABCDEF0123456789"
453 )); assert!(!has_cassandra_table_uuid_suffix("test_basic"));
457 assert!(!has_cassandra_table_uuid_suffix("users"));
458
459 assert!(!has_cassandra_table_uuid_suffix("users-abc123"));
461 assert!(!has_cassandra_table_uuid_suffix("table-456"));
462
463 assert!(!has_cassandra_table_uuid_suffix(
465 "table-6aa08200a25111f0a3fef1a551383fb9extra"
466 ));
467
468 assert!(!has_cassandra_table_uuid_suffix(
470 "table-6aa08200a25111f0a3fef1a551383fgz"
471 )); }
473}