1use std::collections::{HashMap, HashSet};
7use std::io::Write;
8use std::path::{Path, PathBuf};
9use std::time::{SystemTime, UNIX_EPOCH};
10use std::{env as StdEnv, fs as StdFs};
11
12use cargo_metadata::{DependencyKind, Metadata, MetadataCommand, PackageId};
13use serde_json as SJSON;
14
15use crate::error::Error;
16
17#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
19pub struct CollectedCrate {
20 pub name: String,
22
23 pub version: String,
25
26 pub edition: String,
28
29 pub features: Vec<String>,
31
32 pub description: Option<String>,
34
35 pub source_path: String,
37}
38
39#[derive(Debug, serde::Serialize, serde::Deserialize)]
41pub struct SourceManifest {
42 pub collected_at: String,
44
45 pub workspace_root: String,
47
48 pub crates: HashMap<String, CollectedCrate>,
50}
51
52#[derive(Debug)]
54pub struct CollectionResult {
55 pub output_dir: PathBuf,
57
58 pub crates_collected: usize,
60
61 pub skipped: Vec<String>,
63}
64
65#[derive(Debug, Default)]
67pub struct CollectOptions {
68 pub include_dev: bool,
70
71 pub output: Option<PathBuf>,
73
74 pub dry_run: bool,
76
77 pub minimal_sources: bool,
83
84 pub no_gitignore: bool,
86}
87
88#[derive(Debug)]
90pub struct SourceCollector {
91 metadata: Metadata,
93
94 registry_path: PathBuf,
96}
97
98impl SourceCollector {
99 pub fn new() -> Result<Self, Error> {
105 Self::from_manifest(None)
106 }
107
108 pub fn from_manifest(manifest_path: Option<&Path>) -> Result<Self, Error> {
114 let mut cmd = MetadataCommand::new();
115
116 if let Some(path) = manifest_path {
117 cmd.manifest_path(path);
118 }
119
120 let metadata = cmd
121 .exec()
122 .map_err(|e| Error::SourceCollector(format!("Failed to load cargo metadata: {e}")))?;
123
124 let home = StdEnv::var("HOME")
125 .or_else(|_| StdEnv::var("USERPROFILE"))
126 .map_err(|_| Error::SourceCollector("Could not determine home directory".into()))?;
127
128 let registry_path = PathBuf::from(home).join(".cargo/registry/src");
129
130 Ok(Self {
131 metadata,
132 registry_path,
133 })
134 }
135
136 pub fn collect(&self, options: &CollectOptions) -> Result<CollectionResult, Error> {
142 let output_dir = match &options.output {
144 Some(path) => path.clone(),
145 None => self.generate_output_dir()?,
146 };
147
148 if options.dry_run {
149 return self.dry_run_collect(&output_dir, options);
150 }
151
152 StdFs::create_dir_all(&output_dir)
154 .map_err(|e| Error::SourceCollector(format!("Failed to create output dir: {e}")))?;
155
156 let mut manifest = SourceManifest {
157 collected_at: TimeUtils::chrono_lite_now(),
158 workspace_root: self.metadata.workspace_root.to_string(),
159 crates: HashMap::new(),
160 };
161
162 let mut skipped = Vec::new();
163 let mut collected_count = 0;
164
165 let dev_only = if options.include_dev {
167 HashSet::new()
168 } else {
169 self.get_dev_only_packages()
170 };
171
172 for pkg in &self.metadata.packages {
174 if self.metadata.workspace_members.contains(&pkg.id) {
176 continue;
177 }
178
179 if dev_only.contains(&pkg.id) {
181 continue;
182 }
183
184 let version = pkg.version.to_string();
185 let key = format!("{}-{}", pkg.name, version);
186
187 match self.find_registry_source(&pkg.name, &version) {
189 Some(source_path) => {
190 let dest_dir = output_dir.join(&key);
191
192 Self::copy_crate_source(&source_path, &dest_dir, options.minimal_sources)?;
194
195 manifest.crates.insert(
197 key.clone(),
198 CollectedCrate {
199 name: pkg.name.to_string(),
200 version: version.clone(),
201 edition: pkg.edition.to_string(),
202 features: pkg.features.keys().cloned().collect(),
203 description: pkg.description.clone(),
204 source_path: key,
205 },
206 );
207
208 collected_count += 1;
209 },
210 None => {
211 skipped.push(format!("{}-{}", pkg.name, version));
212 },
213 }
214 }
215
216 let manifest_path = output_dir.join("manifest.json");
218 let manifest_json = SJSON::to_string_pretty(&manifest)
219 .map_err(|e| Error::SourceCollector(format!("Failed to serialize manifest: {e}")))?;
220 StdFs::write(&manifest_path, manifest_json)
221 .map_err(|e| Error::SourceCollector(format!("Failed to write manifest: {e}")))?;
222
223 if !options.no_gitignore {
225 self.update_gitignore()?;
226 }
227
228 Ok(CollectionResult {
229 output_dir,
230 crates_collected: collected_count,
231 skipped,
232 })
233 }
234
235 fn generate_output_dir(&self) -> Result<PathBuf, Error> {
237 let workspace_root = self.metadata.workspace_root.as_std_path();
238 let timestamp = SystemTime::now()
239 .duration_since(UNIX_EPOCH)
240 .map_err(|e| Error::SourceCollector(format!("Failed to get timestamp: {e}")))?
241 .as_secs();
242
243 for i in 0..3 {
245 let dir_name = format!(".source_{}", timestamp + i);
246 let path = workspace_root.join(&dir_name);
247
248 if !path.exists() {
249 return Ok(path);
250 }
251 }
252
253 Err(Error::SourceCollector(
254 "Too many .source_* directories exist. Please clean up old ones.".into(),
255 ))
256 }
257
258 fn find_registry_source(&self, name: &str, version: &str) -> Option<PathBuf> {
260 if !self.registry_path.exists() {
261 return None;
262 }
263
264 let target_dir = format!("{name}-{version}");
265
266 for entry in StdFs::read_dir(&self.registry_path).ok()? {
268 let entry = entry.ok()?;
269 let index_path = entry.path();
270
271 if index_path.is_dir() {
272 let crate_path = index_path.join(&target_dir);
273
274 if crate_path.exists() && crate_path.is_dir() {
275 return Some(crate_path);
276 }
277 }
278 }
279
280 None
281 }
282
283 fn copy_crate_source(source: &Path, dest: &Path, minimal: bool) -> Result<(), Error> {
291 StdFs::create_dir_all(dest)
292 .map_err(|e| Error::SourceCollector(format!("Failed to create dir: {e}")))?;
293
294 if minimal {
295 let src_dir = source.join("src");
297
298 if src_dir.exists() {
299 Self::copy_dir_recursive(&src_dir, &dest.join("src"))?;
300 }
301
302 let cargo_toml = source.join("Cargo.toml");
304 if cargo_toml.exists() {
305 StdFs::copy(&cargo_toml, dest.join("Crate.toml"))
306 .map_err(|e| Error::SourceCollector(format!("Failed to copy Cargo.toml: {e}")))?;
307 }
308 } else {
309 for entry in StdFs::read_dir(source).map_err(|e| {
311 Error::SourceCollector(format!("Failed to read source dir: {e}"))
312 })? {
313 let entry = entry.map_err(|e| {
314 Error::SourceCollector(format!("Failed to read entry: {e}"))
315 })?;
316 let path = entry.path();
317 let file_name = entry.file_name();
318 let file_name_str = file_name.to_string_lossy();
319
320 let dest_name = if file_name_str == "Cargo.toml" {
322 "Crate.toml".into()
323 } else {
324 file_name
325 };
326 let dest_path = dest.join(dest_name);
327
328 if path.is_dir() {
329 Self::copy_dir_recursive(&path, &dest_path)?;
330 } else {
331 StdFs::copy(&path, &dest_path).map_err(|e| {
332 Error::SourceCollector(format!(
333 "Failed to copy {} to {}: {e}",
334 path.display(),
335 dest_path.display()
336 ))
337 })?;
338 }
339 }
340 }
341
342 Ok(())
343 }
344
345 fn get_dev_only_packages(&self) -> HashSet<PackageId> {
350 let Some(resolve) = &self.metadata.resolve else {
351 return HashSet::new();
352 };
353
354 let nodes: HashMap<&PackageId, _> =
356 resolve.nodes.iter().map(|node| (&node.id, node)).collect();
357
358 let mut non_dev_reachable: HashSet<PackageId> = HashSet::new();
360 let mut to_visit: Vec<&PackageId> = self.metadata.workspace_members.iter().collect();
361
362 while let Some(pkg_id) = to_visit.pop() {
363 if let Some(node) = nodes.get(pkg_id) {
364 for dep in &node.deps {
365 let has_non_dev = dep
367 .dep_kinds
368 .iter()
369 .any(|dk| !matches!(dk.kind, DependencyKind::Development));
370
371 if has_non_dev && non_dev_reachable.insert(dep.pkg.clone()) {
372 to_visit.push(&dep.pkg);
373 }
374 }
375 }
376 }
377
378 self.metadata
381 .packages
382 .iter()
383 .filter(|pkg| {
384 !self.metadata.workspace_members.contains(&pkg.id)
385 && !non_dev_reachable.contains(&pkg.id)
386 })
387 .map(|pkg| pkg.id.clone())
388 .collect()
389 }
390
391 #[expect(clippy::unnecessary_wraps, reason = "Not really")]
393 fn dry_run_collect(
394 &self,
395 output_dir: &Path,
396 options: &CollectOptions,
397 ) -> Result<CollectionResult, Error> {
398 let mut skipped = Vec::new();
399 let mut collected_count = 0;
400
401 let dev_only = if options.include_dev {
403 HashSet::new()
404 } else {
405 self.get_dev_only_packages()
406 };
407
408 for pkg in &self.metadata.packages {
409 if self.metadata.workspace_members.contains(&pkg.id) {
410 continue;
411 }
412
413 if dev_only.contains(&pkg.id) {
415 continue;
416 }
417
418 let version = pkg.version.to_string();
419
420 if self.find_registry_source(&pkg.name, &version).is_some() {
421 collected_count += 1;
422 } else {
423 skipped.push(format!("{}-{}", pkg.name, version));
424 }
425 }
426
427 Ok(CollectionResult {
428 output_dir: output_dir.to_path_buf(),
429 crates_collected: collected_count,
430 skipped,
431 })
432 }
433
434 fn update_gitignore(&self) -> Result<(), Error> {
436 let gitignore_path = self.metadata.workspace_root.join(".gitignore");
437 let pattern = ".source_*";
438
439 let content = StdFs::read_to_string(&gitignore_path).unwrap_or_default();
441
442 if content.lines().any(|line| line.trim() == pattern) {
444 return Ok(());
445 }
446
447 let mut file = StdFs::OpenOptions::new()
449 .create(true)
450 .append(true)
451 .open(&gitignore_path)
452 .map_err(|e| Error::SourceCollector(format!("Failed to open .gitignore: {e}")))?;
453
454 if !content.is_empty() && !content.ends_with('\n') {
456 writeln!(file).map_err(|e| {
457 Error::SourceCollector(format!("Failed to write to .gitignore: {e}"))
458 })?;
459 }
460
461 writeln!(file, "{pattern}")
462 .map_err(|e| Error::SourceCollector(format!("Failed to write to .gitignore: {e}")))?;
463
464 Ok(())
465 }
466
467 #[must_use]
469 pub fn list_dependencies(&self) -> Vec<(&str, &str)> {
470 self.metadata
471 .packages
472 .iter()
473 .filter(|pkg| !self.metadata.workspace_members.contains(&pkg.id))
474 .map(|pkg| (pkg.name.as_str(), pkg.version.to_string().leak() as &str))
475 .collect()
476 }
477
478 fn copy_dir_recursive(src: &Path, dest: &Path) -> Result<(), Error> {
480 StdFs::create_dir_all(dest).map_err(|e| {
481 Error::SourceCollector(format!("Failed to create dir {}: {e}", dest.display()))
482 })?;
483
484 for entry in StdFs::read_dir(src).map_err(|e| {
485 Error::SourceCollector(format!("Failed to read dir {}: {e}", src.display()))
486 })? {
487 let entry =
488 entry.map_err(|e| Error::SourceCollector(format!("Failed to read entry: {e}")))?;
489 let path = entry.path();
490 let dest_path = dest.join(entry.file_name());
491
492 if path.is_dir() {
493 Self::copy_dir_recursive(&path, &dest_path)?;
494 } else {
495 StdFs::copy(&path, &dest_path).map_err(|e| {
496 Error::SourceCollector(format!(
497 "Failed to copy {} to {}: {e}",
498 path.display(),
499 dest_path.display()
500 ))
501 })?;
502 }
503 }
504
505 Ok(())
506 }
507}
508
509struct TimeUtils;
510
511impl TimeUtils {
512 fn chrono_lite_now() -> String {
514 let duration = SystemTime::now()
515 .duration_since(UNIX_EPOCH)
516 .unwrap_or_default();
517 let secs = duration.as_secs();
518
519 let days_since_epoch = secs / 86400;
522 let time_of_day = secs % 86400;
523
524 let hours = time_of_day / 3600;
525 let minutes = (time_of_day % 3600) / 60;
526 let seconds = time_of_day % 60;
527
528 let mut year = 1970;
530 let mut remaining_days = days_since_epoch;
531
532 loop {
533 let days_in_year = if Self::is_leap_year(year) { 366 } else { 365 };
534
535 if remaining_days < days_in_year {
536 break;
537 }
538
539 remaining_days -= days_in_year;
540 year += 1;
541 }
542
543 let mut month = 1;
544 let days_in_months = if Self::is_leap_year(year) {
545 [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
546 } else {
547 [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
548 };
549
550 for days in days_in_months {
551 if remaining_days < days {
552 break;
553 }
554 remaining_days -= days;
555 month += 1;
556 }
557
558 let day = remaining_days + 1;
559
560 format!("{year:04}-{month:02}-{day:02}T{hours:02}:{minutes:02}:{seconds:02}Z")
561 }
562
563 const fn is_leap_year(year: u64) -> bool {
564 (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400)
565 }
566}
567
568#[cfg(test)]
569mod tests {
570 use super::{SourceCollector, TimeUtils};
571
572 #[test]
573 fn test_chrono_lite_now() {
574 let ts = TimeUtils::chrono_lite_now();
575 assert!(ts.contains('T'));
577 assert!(ts.ends_with('Z'));
578 assert!(ts.starts_with("20")); }
580
581 #[test]
582 fn test_is_leap_year() {
583 assert!(TimeUtils::is_leap_year(2000));
584 assert!(TimeUtils::is_leap_year(2024));
585 assert!(!TimeUtils::is_leap_year(1900));
586 assert!(!TimeUtils::is_leap_year(2023));
587 }
588
589 #[test]
590 fn test_get_dev_only_packages_detects_dev_deps() {
591 let collector = SourceCollector::new().expect("Failed to create collector");
593 let dev_only = collector.get_dev_only_packages();
594
595 let dev_only_names: Vec<&str> = collector
597 .metadata
598 .packages
599 .iter()
600 .filter(|pkg| dev_only.contains(&pkg.id))
601 .map(|pkg| pkg.name.as_str())
602 .collect();
603
604 assert!(
606 dev_only_names.contains(&"insta"),
607 "insta should be detected as dev-only, got: {dev_only_names:?}"
608 );
609 assert!(
610 dev_only_names.contains(&"divan"),
611 "divan should be detected as dev-only, got: {dev_only_names:?}"
612 );
613 }
614
615 #[test]
616 fn test_get_dev_only_packages_excludes_normal_deps() {
617 let collector = SourceCollector::new().expect("Failed to create collector");
618 let dev_only = collector.get_dev_only_packages();
619
620 let dev_only_names: Vec<&str> = collector
622 .metadata
623 .packages
624 .iter()
625 .filter(|pkg| dev_only.contains(&pkg.id))
626 .map(|pkg| pkg.name.as_str())
627 .collect();
628
629 assert!(
631 !dev_only_names.contains(&"serde"),
632 "serde should NOT be dev-only"
633 );
634 assert!(
635 !dev_only_names.contains(&"clap"),
636 "clap should NOT be dev-only"
637 );
638 assert!(
639 !dev_only_names.contains(&"syn"),
640 "syn should NOT be dev-only"
641 );
642 assert!(
644 !dev_only_names.contains(&"tracing"),
645 "tracing should NOT be dev-only (it's also a normal dependency)"
646 );
647 }
648
649 #[test]
650 fn test_get_dev_only_packages_with_no_resolve() {
651 let mut collector = SourceCollector::new().expect("Failed to create collector");
653
654 collector.metadata.resolve = None;
656
657 let dev_only = collector.get_dev_only_packages();
658 assert!(
659 dev_only.is_empty(),
660 "Should return empty set when no resolve graph"
661 );
662 }
663
664 #[test]
665 fn test_list_dependencies_excludes_workspace_members() {
666 let collector = SourceCollector::new().expect("Failed to create collector");
667 let deps = collector.list_dependencies();
668
669 let dep_names: Vec<&str> = deps.iter().map(|(name, _)| *name).collect();
671 assert!(
672 !dep_names.contains(&"cargo-docs-md"),
673 "Should not include workspace member"
674 );
675
676 assert!(
678 dep_names.contains(&"serde"),
679 "Should include serde dependency"
680 );
681 }
682
683 #[test]
684 fn test_collect_options_defaults() {
685 let options = super::CollectOptions::default();
686
687 assert!(!options.include_dev, "include_dev should default to false");
688 assert!(options.output.is_none(), "output should default to None");
689 assert!(!options.dry_run, "dry_run should default to false");
690 assert!(
691 !options.minimal_sources,
692 "minimal_sources should default to false (full copy)"
693 );
694 assert!(
695 !options.no_gitignore,
696 "no_gitignore should default to false (update gitignore)"
697 );
698 }
699
700 #[test]
701 fn test_copy_crate_source_minimal_mode() {
702 use std::fs;
703 use tempfile::TempDir;
704
705 let source_dir = TempDir::new().expect("Failed to create temp dir");
707 let source_path = source_dir.path();
708
709 fs::create_dir_all(source_path.join("src")).expect("Failed to create src dir");
711 fs::write(source_path.join("src/lib.rs"), "// lib content").expect("Failed to write lib.rs");
712
713 fs::write(source_path.join("Cargo.toml"), "[package]\nname = \"test\"")
715 .expect("Failed to write Cargo.toml");
716
717 fs::write(source_path.join("build.rs"), "fn main() {}").expect("Failed to write build.rs");
719 fs::create_dir_all(source_path.join("benches")).expect("Failed to create benches dir");
720 fs::write(source_path.join("benches/bench.rs"), "// bench")
721 .expect("Failed to write bench.rs");
722
723 let dest_dir = TempDir::new().expect("Failed to create dest temp dir");
725 let dest_path = dest_dir.path().join("test-crate");
726
727 SourceCollector::copy_crate_source(source_path, &dest_path, true)
729 .expect("Failed to copy crate source");
730
731 assert!(dest_path.join("src/lib.rs").exists(), "src/lib.rs should be copied");
733 assert!(
734 dest_path.join("Crate.toml").exists(),
735 "Cargo.toml should be copied as Crate.toml"
736 );
737 assert!(
738 !dest_path.join("Cargo.toml").exists(),
739 "Cargo.toml should be renamed, not copied"
740 );
741 assert!(
742 !dest_path.join("build.rs").exists(),
743 "build.rs should NOT be copied in minimal mode"
744 );
745 assert!(
746 !dest_path.join("benches").exists(),
747 "benches/ should NOT be copied in minimal mode"
748 );
749 }
750
751 #[test]
752 fn test_copy_crate_source_full_mode() {
753 use std::fs;
754 use tempfile::TempDir;
755
756 let source_dir = TempDir::new().expect("Failed to create temp dir");
758 let source_path = source_dir.path();
759
760 fs::create_dir_all(source_path.join("src")).expect("Failed to create src dir");
762 fs::write(source_path.join("src/lib.rs"), "// lib content").expect("Failed to write lib.rs");
763
764 fs::write(source_path.join("Cargo.toml"), "[package]\nname = \"test\"")
766 .expect("Failed to write Cargo.toml");
767
768 fs::write(source_path.join("build.rs"), "fn main() {}").expect("Failed to write build.rs");
770 fs::create_dir_all(source_path.join("benches")).expect("Failed to create benches dir");
771 fs::write(source_path.join("benches/bench.rs"), "// bench")
772 .expect("Failed to write bench.rs");
773 fs::write(source_path.join("README.md"), "# Test").expect("Failed to write README.md");
774
775 let dest_dir = TempDir::new().expect("Failed to create dest temp dir");
777 let dest_path = dest_dir.path().join("test-crate");
778
779 SourceCollector::copy_crate_source(source_path, &dest_path, false)
781 .expect("Failed to copy crate source");
782
783 assert!(dest_path.join("src/lib.rs").exists(), "src/lib.rs should be copied");
785 assert!(
786 dest_path.join("Crate.toml").exists(),
787 "Cargo.toml should be copied as Crate.toml"
788 );
789 assert!(
790 !dest_path.join("Cargo.toml").exists(),
791 "Cargo.toml should be renamed, not duplicated"
792 );
793 assert!(
794 dest_path.join("build.rs").exists(),
795 "build.rs SHOULD be copied in full mode"
796 );
797 assert!(
798 dest_path.join("benches/bench.rs").exists(),
799 "benches/ SHOULD be copied in full mode"
800 );
801 assert!(
802 dest_path.join("README.md").exists(),
803 "README.md SHOULD be copied in full mode"
804 );
805 }
806}