1use std::path::{Path, PathBuf};
4
5use clap::Subcommand;
6
7use super::basic::load_dataset;
8use crate::{
9 backend::LocalBackend,
10 registry::{DatasetMetadata, Registry},
11 Dataset,
12};
13
14#[derive(Subcommand)]
16pub enum RegistryCommands {
17 Init {
19 #[arg(short, long, default_value = ".alimentar")]
21 path: PathBuf,
22 },
23 List {
25 #[arg(short, long, default_value = ".alimentar")]
27 path: PathBuf,
28 },
29 Push {
31 input: PathBuf,
33 #[arg(short, long)]
35 name: String,
36 #[arg(short, long, default_value = "1.0.0")]
38 version: String,
39 #[arg(short, long, default_value = "")]
41 description: String,
42 #[arg(short, long, default_value = "")]
44 license: String,
45 #[arg(short, long, default_value = "")]
47 tags: String,
48 #[arg(long, default_value = ".alimentar")]
50 registry: PathBuf,
51 },
52 Pull {
54 name: String,
56 #[arg(short, long)]
58 output: PathBuf,
59 #[arg(short, long)]
61 version: Option<String>,
62 #[arg(long, default_value = ".alimentar")]
64 registry: PathBuf,
65 },
66 Search {
68 query: String,
70 #[arg(short, long, default_value = ".alimentar")]
72 path: PathBuf,
73 },
74 ShowInfo {
76 name: String,
78 #[arg(short, long, default_value = ".alimentar")]
80 path: PathBuf,
81 },
82 Delete {
84 name: String,
86 #[arg(short, long)]
88 version: String,
89 #[arg(short, long, default_value = ".alimentar")]
91 path: PathBuf,
92 },
93}
94
95pub(crate) fn create_registry(path: &Path) -> crate::Result<Registry> {
97 if !path.exists() {
99 std::fs::create_dir_all(path).map_err(|e| crate::Error::io(e, path))?;
100 }
101 let backend = LocalBackend::new(path)?;
102 Ok(Registry::new(Box::new(backend)))
103}
104
105pub(crate) fn cmd_registry_init(path: &Path) -> crate::Result<()> {
107 let registry = create_registry(path)?;
108 registry.init()?;
109 println!("Initialized registry at: {}", path.display());
110 Ok(())
111}
112
113pub(crate) fn cmd_registry_list(path: &Path) -> crate::Result<()> {
115 let registry = create_registry(path)?;
116 let datasets = registry.list()?;
117
118 if datasets.is_empty() {
119 println!("No datasets in registry.");
120 return Ok(());
121 }
122
123 println!("Datasets in registry:\n");
124 println!(
125 "{:<25} {:<12} {:<10} {:<15} DESCRIPTION",
126 "NAME", "LATEST", "VERSIONS", "ROWS"
127 );
128 println!("{}", "-".repeat(80));
129
130 for ds in datasets {
131 let desc = if ds.metadata.description.len() > 30 {
132 format!("{}...", &ds.metadata.description[..27])
133 } else {
134 ds.metadata.description.clone()
135 };
136 println!(
137 "{:<25} {:<12} {:<10} {:<15} {}",
138 ds.name,
139 ds.latest,
140 ds.versions.len(),
141 ds.num_rows,
142 desc
143 );
144 }
145
146 Ok(())
147}
148
149#[allow(clippy::too_many_arguments)]
151pub(crate) fn cmd_registry_push(
152 input: &Path,
153 name: &str,
154 version: &str,
155 description: &str,
156 license: &str,
157 tags: &str,
158 registry_path: &Path,
159) -> crate::Result<()> {
160 let registry = create_registry(registry_path)?;
161
162 registry.init()?;
164
165 let dataset = load_dataset(input)?;
167
168 let tag_list: Vec<String> = if tags.is_empty() {
170 Vec::new()
171 } else {
172 tags.split(',').map(|s| s.trim().to_string()).collect()
173 };
174
175 let metadata = DatasetMetadata {
177 description: description.to_string(),
178 license: license.to_string(),
179 tags: tag_list,
180 source: Some(input.display().to_string()),
181 citation: None,
182 sha256: None, };
184
185 registry.publish(name, version, &dataset, metadata)?;
187
188 println!(
189 "Published {}@{} ({} rows) to registry",
190 name,
191 version,
192 dataset.len()
193 );
194
195 Ok(())
196}
197
198pub(crate) fn cmd_registry_pull(
200 name: &str,
201 output: &Path,
202 version: Option<&str>,
203 registry_path: &Path,
204) -> crate::Result<()> {
205 let registry = create_registry(registry_path)?;
206
207 let dataset = registry.pull(name, version)?;
209
210 dataset.to_parquet(output)?;
212
213 let ver = version.unwrap_or("latest");
214 println!(
215 "Pulled {}@{} ({} rows) to {}",
216 name,
217 ver,
218 dataset.len(),
219 output.display()
220 );
221
222 Ok(())
223}
224
225pub(crate) fn cmd_registry_search(query: &str, path: &Path) -> crate::Result<()> {
227 let registry = create_registry(path)?;
228 let results = registry.search(query)?;
229
230 if results.is_empty() {
231 println!("No datasets found matching '{}'", query);
232 return Ok(());
233 }
234
235 println!("Search results for '{}':\n", query);
236 println!("{:<25} {:<12} {:<10} DESCRIPTION", "NAME", "LATEST", "ROWS");
237 println!("{}", "-".repeat(70));
238
239 for ds in results {
240 let desc = if ds.metadata.description.len() > 30 {
241 format!("{}...", &ds.metadata.description[..27])
242 } else {
243 ds.metadata.description.clone()
244 };
245 println!(
246 "{:<25} {:<12} {:<10} {}",
247 ds.name, ds.latest, ds.num_rows, desc
248 );
249 }
250
251 Ok(())
252}
253
254pub(crate) fn cmd_registry_show_info(name: &str, path: &Path) -> crate::Result<()> {
256 let registry = create_registry(path)?;
257 let info = registry.get_info(name)?;
258
259 println!("Dataset: {}", info.name);
260 println!("Latest: {}", info.latest);
261 println!("Versions: {}", info.versions.join(", "));
262 println!("Rows: {}", info.num_rows);
263 println!("Size: {} bytes", info.size_bytes);
264 println!();
265 println!("Description: {}", info.metadata.description);
266 println!("License: {}", info.metadata.license);
267 println!("Tags: {}", info.metadata.tags.join(", "));
268
269 if let Some(source) = &info.metadata.source {
270 println!("Source: {}", source);
271 }
272 if let Some(citation) = &info.metadata.citation {
273 println!("Citation: {}", citation);
274 }
275
276 println!();
277 println!("Schema:");
278 if let Some(fields) = info.schema.get("fields").and_then(|f| f.as_array()) {
279 for field in fields {
280 let name = field.get("name").and_then(|n| n.as_str()).unwrap_or("?");
281 let dtype = field
282 .get("data_type")
283 .and_then(|d| d.as_str())
284 .unwrap_or("?");
285 let nullable = field
286 .get("nullable")
287 .and_then(serde_json::Value::as_bool)
288 .unwrap_or(true);
289 let null_str = if nullable { "nullable" } else { "not null" };
290 println!(" - {} ({}) [{}]", name, dtype, null_str);
291 }
292 }
293
294 Ok(())
295}
296
297pub(crate) fn cmd_registry_delete(name: &str, version: &str, path: &Path) -> crate::Result<()> {
299 let registry = create_registry(path)?;
300 registry.delete(name, version)?;
301 println!("Deleted {}@{} from registry", name, version);
302 Ok(())
303}
304
305#[cfg(test)]
306#[allow(
307 clippy::cast_possible_truncation,
308 clippy::cast_possible_wrap,
309 clippy::cast_precision_loss,
310 clippy::uninlined_format_args,
311 clippy::unwrap_used,
312 clippy::expect_used,
313 clippy::redundant_clone,
314 clippy::cast_lossless,
315 clippy::redundant_closure_for_method_calls,
316 clippy::too_many_lines,
317 clippy::float_cmp,
318 clippy::similar_names,
319 clippy::needless_late_init,
320 clippy::redundant_pattern_matching
321)]
322mod tests {
323 use std::sync::Arc;
324
325 use arrow::{
326 array::{Int32Array, StringArray},
327 datatypes::{DataType, Field, Schema},
328 };
329
330 use super::*;
331 use crate::ArrowDataset;
332
333 fn create_test_parquet(path: &Path, rows: usize) {
334 let schema = Arc::new(Schema::new(vec![
335 Field::new("id", DataType::Int32, false),
336 Field::new("name", DataType::Utf8, false),
337 ]));
338
339 let ids: Vec<i32> = (0..rows as i32).collect();
340 let names: Vec<String> = ids.iter().map(|i| format!("item_{}", i)).collect();
341
342 let batch = arrow::array::RecordBatch::try_new(
343 schema,
344 vec![
345 Arc::new(Int32Array::from(ids)),
346 Arc::new(StringArray::from(names)),
347 ],
348 )
349 .ok()
350 .unwrap_or_else(|| panic!("Should create batch"));
351
352 let dataset = ArrowDataset::from_batch(batch)
353 .ok()
354 .unwrap_or_else(|| panic!("Should create dataset"));
355
356 dataset
357 .to_parquet(path)
358 .ok()
359 .unwrap_or_else(|| panic!("Should write parquet"));
360 }
361
362 #[test]
363 fn test_cmd_registry_init() {
364 let temp_dir = tempfile::tempdir()
365 .ok()
366 .unwrap_or_else(|| panic!("Should create temp dir"));
367 let registry_path = temp_dir.path().join("registry");
368
369 let result = cmd_registry_init(®istry_path);
370 assert!(result.is_ok());
371 assert!(registry_path.exists());
372 }
373
374 #[test]
375 fn test_cmd_registry_list_empty() {
376 let temp_dir = tempfile::tempdir()
377 .ok()
378 .unwrap_or_else(|| panic!("Should create temp dir"));
379 let registry_path = temp_dir.path().join("registry");
380
381 cmd_registry_init(®istry_path)
383 .ok()
384 .unwrap_or_else(|| panic!("Should init"));
385
386 let result = cmd_registry_list(®istry_path);
387 assert!(result.is_ok());
388 }
389
390 #[test]
391 fn test_cmd_registry_push_and_pull() {
392 let temp_dir = tempfile::tempdir()
393 .ok()
394 .unwrap_or_else(|| panic!("Should create temp dir"));
395 let registry_path = temp_dir.path().join("registry");
396 let input = temp_dir.path().join("data.parquet");
397 let output = temp_dir.path().join("pulled.parquet");
398
399 create_test_parquet(&input, 25);
401
402 let result = cmd_registry_push(
404 &input,
405 "test-dataset",
406 "1.0.0",
407 "A test dataset",
408 "MIT",
409 "test,example",
410 ®istry_path,
411 );
412 assert!(result.is_ok());
413
414 let result = cmd_registry_list(®istry_path);
416 assert!(result.is_ok());
417
418 let result = cmd_registry_pull("test-dataset", &output, Some("1.0.0"), ®istry_path);
420 assert!(result.is_ok());
421 assert!(output.exists());
422
423 let original = ArrowDataset::from_parquet(&input)
425 .ok()
426 .unwrap_or_else(|| panic!("Should load original"));
427 let pulled = ArrowDataset::from_parquet(&output)
428 .ok()
429 .unwrap_or_else(|| panic!("Should load pulled"));
430 assert_eq!(original.len(), pulled.len());
431 }
432
433 #[test]
434 fn test_cmd_registry_search() {
435 let temp_dir = tempfile::tempdir()
436 .ok()
437 .unwrap_or_else(|| panic!("Should create temp dir"));
438 let registry_path = temp_dir.path().join("registry");
439 let input = temp_dir.path().join("data.parquet");
440
441 create_test_parquet(&input, 10);
442
443 cmd_registry_push(
445 &input,
446 "ml-dataset",
447 "1.0.0",
448 "Machine learning training data",
449 "Apache-2.0",
450 "ml,training",
451 ®istry_path,
452 )
453 .ok()
454 .unwrap_or_else(|| panic!("Should push"));
455
456 let result = cmd_registry_search("ml", ®istry_path);
458 assert!(result.is_ok());
459
460 let result = cmd_registry_search("machine", ®istry_path);
462 assert!(result.is_ok());
463 }
464
465 #[test]
466 fn test_cmd_registry_show_info() {
467 let temp_dir = tempfile::tempdir()
468 .ok()
469 .unwrap_or_else(|| panic!("Should create temp dir"));
470 let registry_path = temp_dir.path().join("registry");
471 let input = temp_dir.path().join("data.parquet");
472
473 create_test_parquet(&input, 10);
474
475 cmd_registry_push(
476 &input,
477 "info-test",
478 "1.0.0",
479 "Test description",
480 "MIT",
481 "test",
482 ®istry_path,
483 )
484 .ok()
485 .unwrap_or_else(|| panic!("Should push"));
486
487 let result = cmd_registry_show_info("info-test", ®istry_path);
488 assert!(result.is_ok());
489 }
490
491 #[test]
492 fn test_cmd_registry_delete() {
493 let temp_dir = tempfile::tempdir()
494 .ok()
495 .unwrap_or_else(|| panic!("Should create temp dir"));
496 let registry_path = temp_dir.path().join("registry");
497 let input = temp_dir.path().join("data.parquet");
498
499 create_test_parquet(&input, 10);
500
501 cmd_registry_push(
503 &input,
504 "delete-test",
505 "1.0.0",
506 "Will be deleted",
507 "",
508 "",
509 ®istry_path,
510 )
511 .ok()
512 .unwrap_or_else(|| panic!("Should push"));
513
514 let result = cmd_registry_delete("delete-test", "1.0.0", ®istry_path);
516 assert!(result.is_ok());
517
518 let result = cmd_registry_show_info("delete-test", ®istry_path);
520 assert!(result.is_err());
521 }
522
523 #[test]
524 fn test_cmd_registry_pull_latest() {
525 let temp_dir = tempfile::tempdir()
526 .ok()
527 .unwrap_or_else(|| panic!("Should create temp dir"));
528 let registry_path = temp_dir.path().join("registry");
529 let input1 = temp_dir.path().join("v1.parquet");
530 let input2 = temp_dir.path().join("v2.parquet");
531 let output = temp_dir.path().join("pulled.parquet");
532
533 create_test_parquet(&input1, 10);
534 create_test_parquet(&input2, 20);
535
536 cmd_registry_push(&input1, "versioned", "1.0.0", "V1", "", "", ®istry_path)
538 .ok()
539 .unwrap_or_else(|| panic!("Should push v1"));
540
541 cmd_registry_push(&input2, "versioned", "2.0.0", "V2", "", "", ®istry_path)
543 .ok()
544 .unwrap_or_else(|| panic!("Should push v2"));
545
546 let result = cmd_registry_pull("versioned", &output, None, ®istry_path);
548 assert!(result.is_ok());
549
550 let pulled = ArrowDataset::from_parquet(&output)
552 .ok()
553 .unwrap_or_else(|| panic!("Should load"));
554 assert_eq!(pulled.len(), 20);
555 }
556
557 #[test]
558 fn test_cmd_registry_search_no_results() {
559 let temp_dir = tempfile::tempdir()
560 .ok()
561 .unwrap_or_else(|| panic!("Should create temp dir"));
562 let registry_path = temp_dir.path().join("registry");
563
564 cmd_registry_init(®istry_path)
566 .ok()
567 .unwrap_or_else(|| panic!("Should init"));
568
569 let result = cmd_registry_search("nonexistent-dataset-xyz", ®istry_path);
571 assert!(result.is_ok());
572 }
573
574 #[test]
575 fn test_cmd_registry_push_with_long_description() {
576 let temp_dir = tempfile::tempdir()
577 .ok()
578 .unwrap_or_else(|| panic!("Should create temp dir"));
579 let registry_path = temp_dir.path().join("registry");
580 let input = temp_dir.path().join("data.parquet");
581
582 create_test_parquet(&input, 10);
583
584 let long_desc = "This is a very long description that exceeds thirty characters and will be truncated in the list view";
586 let result = cmd_registry_push(
587 &input,
588 "long-desc-test",
589 "1.0.0",
590 long_desc,
591 "MIT",
592 "",
593 ®istry_path,
594 );
595 assert!(result.is_ok());
596
597 let result = cmd_registry_list(®istry_path);
599 assert!(result.is_ok());
600 }
601
602 #[test]
603 fn test_cmd_registry_show_info_with_all_metadata() {
604 let temp_dir = tempfile::tempdir()
605 .ok()
606 .unwrap_or_else(|| panic!("Should create temp dir"));
607 let registry_path = temp_dir.path().join("registry");
608 let input = temp_dir.path().join("data.parquet");
609
610 create_test_parquet(&input, 10);
611
612 cmd_registry_push(
614 &input,
615 "full-metadata",
616 "1.0.0",
617 "Full metadata test",
618 "Apache-2.0",
619 "test,metadata,full",
620 ®istry_path,
621 )
622 .ok()
623 .unwrap_or_else(|| panic!("Should push"));
624
625 let result = cmd_registry_show_info("full-metadata", ®istry_path);
626 assert!(result.is_ok());
627 }
628
629 #[test]
630 fn test_create_registry_new_directory() {
631 let temp_dir = tempfile::tempdir()
632 .ok()
633 .unwrap_or_else(|| panic!("Should create temp dir"));
634 let registry_path = temp_dir.path().join("new_registry_dir");
635
636 assert!(!registry_path.exists());
638
639 let result = create_registry(®istry_path);
640 assert!(result.is_ok());
641
642 assert!(registry_path.exists());
644 }
645
646 #[test]
647 fn test_cmd_registry_delete_nonexistent() {
648 let temp_dir = tempfile::tempdir()
649 .ok()
650 .unwrap_or_else(|| panic!("Should create temp dir"));
651 let registry_path = temp_dir.path().join("registry");
652
653 cmd_registry_init(®istry_path)
655 .ok()
656 .unwrap_or_else(|| panic!("Should init"));
657
658 let result = cmd_registry_delete("nonexistent", "1.0.0", ®istry_path);
660 assert!(result.is_err());
661 }
662
663 #[test]
664 fn test_cmd_registry_pull_nonexistent() {
665 let temp_dir = tempfile::tempdir()
666 .ok()
667 .unwrap_or_else(|| panic!("Should create temp dir"));
668 let registry_path = temp_dir.path().join("registry");
669 let output = temp_dir.path().join("output.parquet");
670
671 cmd_registry_init(®istry_path)
673 .ok()
674 .unwrap_or_else(|| panic!("Should init"));
675
676 let result = cmd_registry_pull("nonexistent", &output, None, ®istry_path);
678 assert!(result.is_err());
679 }
680
681 #[test]
682 fn test_cmd_registry_search_with_data() {
683 let temp_dir = tempfile::tempdir()
684 .ok()
685 .unwrap_or_else(|| panic!("Should create temp dir"));
686 let registry_path = temp_dir.path().join("registry");
687 let data_path = temp_dir.path().join("data.parquet");
688
689 create_test_parquet(&data_path, 20);
690
691 cmd_registry_push(
693 &data_path,
694 "searchable-data",
695 "1.0.0",
696 "Dataset for search test",
697 "MIT",
698 "search,test",
699 ®istry_path,
700 )
701 .unwrap();
702
703 let result = cmd_registry_search("search", ®istry_path);
704 assert!(result.is_ok());
705 }
706
707 #[test]
708 fn test_cmd_registry_search_empty_results() {
709 let temp_dir = tempfile::tempdir()
710 .ok()
711 .unwrap_or_else(|| panic!("Should create temp dir"));
712 let registry_path = temp_dir.path().join("registry");
713
714 cmd_registry_init(®istry_path).unwrap();
715
716 let result = cmd_registry_search("nonexistent", ®istry_path);
717 assert!(result.is_ok());
718 }
719
720 #[test]
721 fn test_cmd_registry_show_info_basic() {
722 let temp_dir = tempfile::tempdir()
723 .ok()
724 .unwrap_or_else(|| panic!("Should create temp dir"));
725 let registry_path = temp_dir.path().join("registry");
726 let data_path = temp_dir.path().join("data.parquet");
727
728 create_test_parquet(&data_path, 20);
729
730 cmd_registry_push(
731 &data_path,
732 "info-dataset",
733 "1.0.0",
734 "Dataset for info test",
735 "Apache-2.0",
736 "info,test",
737 ®istry_path,
738 )
739 .unwrap();
740
741 let result = cmd_registry_show_info("info-dataset", ®istry_path);
742 assert!(result.is_ok());
743 }
744
745 #[test]
746 fn test_cmd_registry_show_info_not_found() {
747 let temp_dir = tempfile::tempdir()
748 .ok()
749 .unwrap_or_else(|| panic!("Should create temp dir"));
750 let registry_path = temp_dir.path().join("registry");
751
752 cmd_registry_init(®istry_path).unwrap();
753
754 let result = cmd_registry_show_info("nonexistent", ®istry_path);
755 assert!(result.is_err());
756 }
757
758 #[test]
759 fn test_cmd_registry_delete_existing() {
760 let temp_dir = tempfile::tempdir()
761 .ok()
762 .unwrap_or_else(|| panic!("Should create temp dir"));
763 let registry_path = temp_dir.path().join("registry");
764 let data_path = temp_dir.path().join("data.parquet");
765
766 create_test_parquet(&data_path, 20);
767
768 cmd_registry_push(
769 &data_path,
770 "delete-test",
771 "1.0.0",
772 "Dataset to delete",
773 "MIT",
774 "delete,test",
775 ®istry_path,
776 )
777 .unwrap();
778
779 let result = cmd_registry_delete("delete-test", "1.0.0", ®istry_path);
780 assert!(result.is_ok());
781 }
782}