1use std::collections::{HashMap, HashSet};
7use std::io::Write;
8use std::path::{Path, PathBuf};
9use std::time::{SystemTime, UNIX_EPOCH};
10use std::{env as StdEnv, fs as StdFs};
11
12use cargo_metadata::{DependencyKind, Metadata, MetadataCommand, PackageId};
13use serde_json as SJSON;
14
15use crate::error::Error;
16
17#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
19pub struct CollectedCrate {
20 pub name: String,
22
23 pub version: String,
25
26 pub edition: String,
28
29 pub features: Vec<String>,
31
32 pub description: Option<String>,
34
35 pub source_path: String,
37}
38
39#[derive(Debug, serde::Serialize, serde::Deserialize)]
41pub struct SourceManifest {
42 pub collected_at: String,
44
45 pub workspace_root: String,
47
48 pub crates: HashMap<String, CollectedCrate>,
50}
51
52#[derive(Debug)]
54pub struct CollectionResult {
55 pub output_dir: PathBuf,
57
58 pub crates_collected: usize,
60
61 pub skipped: Vec<String>,
63}
64
65#[derive(Debug, Default)]
67pub struct CollectOptions {
68 pub include_dev: bool,
70
71 pub output: Option<PathBuf>,
73
74 pub dry_run: bool,
76}
77
78#[derive(Debug)]
80pub struct SourceCollector {
81 metadata: Metadata,
83
84 registry_path: PathBuf,
86}
87
88impl SourceCollector {
89 pub fn new() -> Result<Self, Error> {
95 Self::from_manifest(None)
96 }
97
98 pub fn from_manifest(manifest_path: Option<&Path>) -> Result<Self, Error> {
104 let mut cmd = MetadataCommand::new();
105
106 if let Some(path) = manifest_path {
107 cmd.manifest_path(path);
108 }
109
110 let metadata = cmd
111 .exec()
112 .map_err(|e| Error::SourceCollector(format!("Failed to load cargo metadata: {e}")))?;
113
114 let home = StdEnv::var("HOME")
115 .or_else(|_| StdEnv::var("USERPROFILE"))
116 .map_err(|_| Error::SourceCollector("Could not determine home directory".into()))?;
117
118 let registry_path = PathBuf::from(home).join(".cargo/registry/src");
119
120 Ok(Self {
121 metadata,
122 registry_path,
123 })
124 }
125
126 pub fn collect(&self, options: &CollectOptions) -> Result<CollectionResult, Error> {
132 let output_dir = match &options.output {
134 Some(path) => path.clone(),
135 None => self.generate_output_dir()?,
136 };
137
138 if options.dry_run {
139 return self.dry_run_collect(&output_dir, options);
140 }
141
142 StdFs::create_dir_all(&output_dir)
144 .map_err(|e| Error::SourceCollector(format!("Failed to create output dir: {e}")))?;
145
146 let mut manifest = SourceManifest {
147 collected_at: TimeUtils::chrono_lite_now(),
148 workspace_root: self.metadata.workspace_root.to_string(),
149 crates: HashMap::new(),
150 };
151
152 let mut skipped = Vec::new();
153 let mut collected_count = 0;
154
155 let dev_only = if options.include_dev {
157 HashSet::new()
158 } else {
159 self.get_dev_only_packages()
160 };
161
162 for pkg in &self.metadata.packages {
164 if self.metadata.workspace_members.contains(&pkg.id) {
166 continue;
167 }
168
169 if dev_only.contains(&pkg.id) {
171 continue;
172 }
173
174 let version = pkg.version.to_string();
175 let key = format!("{}-{}", pkg.name, version);
176
177 match self.find_registry_source(&pkg.name, &version) {
179 Some(source_path) => {
180 let dest_dir = output_dir.join(&key);
181
182 Self::copy_crate_source(&source_path, &dest_dir)?;
184
185 manifest.crates.insert(
187 key.clone(),
188 CollectedCrate {
189 name: pkg.name.to_string(),
190 version: version.clone(),
191 edition: pkg.edition.to_string(),
192 features: pkg.features.keys().cloned().collect(),
193 description: pkg.description.clone(),
194 source_path: key,
195 },
196 );
197
198 collected_count += 1;
199 },
200 None => {
201 skipped.push(format!("{}-{}", pkg.name, version));
202 },
203 }
204 }
205
206 let manifest_path = output_dir.join("manifest.json");
208 let manifest_json = SJSON::to_string_pretty(&manifest)
209 .map_err(|e| Error::SourceCollector(format!("Failed to serialize manifest: {e}")))?;
210 StdFs::write(&manifest_path, manifest_json)
211 .map_err(|e| Error::SourceCollector(format!("Failed to write manifest: {e}")))?;
212
213 self.update_gitignore()?;
215
216 Ok(CollectionResult {
217 output_dir,
218 crates_collected: collected_count,
219 skipped,
220 })
221 }
222
223 fn generate_output_dir(&self) -> Result<PathBuf, Error> {
225 let workspace_root = self.metadata.workspace_root.as_std_path();
226 let timestamp = SystemTime::now()
227 .duration_since(UNIX_EPOCH)
228 .map_err(|e| Error::SourceCollector(format!("Failed to get timestamp: {e}")))?
229 .as_secs();
230
231 for i in 0..3 {
233 let dir_name = format!(".source_{}", timestamp + i);
234 let path = workspace_root.join(&dir_name);
235
236 if !path.exists() {
237 return Ok(path);
238 }
239 }
240
241 Err(Error::SourceCollector(
242 "Too many .source_* directories exist. Please clean up old ones.".into(),
243 ))
244 }
245
246 fn find_registry_source(&self, name: &str, version: &str) -> Option<PathBuf> {
248 if !self.registry_path.exists() {
249 return None;
250 }
251
252 let target_dir = format!("{name}-{version}");
253
254 for entry in StdFs::read_dir(&self.registry_path).ok()? {
256 let entry = entry.ok()?;
257 let index_path = entry.path();
258
259 if index_path.is_dir() {
260 let crate_path = index_path.join(&target_dir);
261
262 if crate_path.exists() && crate_path.is_dir() {
263 return Some(crate_path);
264 }
265 }
266 }
267
268 None
269 }
270
271 fn copy_crate_source(source: &Path, dest: &Path) -> Result<(), Error> {
273 StdFs::create_dir_all(dest)
274 .map_err(|e| Error::SourceCollector(format!("Failed to create dir: {e}")))?;
275
276 let src_dir = source.join("src");
278
279 if src_dir.exists() {
280 Self::copy_dir_recursive(&src_dir, &dest.join("src"))?;
281 }
282
283 let cargo_toml = source.join("Cargo.toml");
285 if cargo_toml.exists() {
286 StdFs::copy(&cargo_toml, dest.join("Crate.toml"))
287 .map_err(|e| Error::SourceCollector(format!("Failed to copy Cargo.toml: {e}")))?;
288 }
289
290 Ok(())
291 }
292
293 fn get_dev_only_packages(&self) -> HashSet<PackageId> {
298 let Some(resolve) = &self.metadata.resolve else {
299 return HashSet::new();
300 };
301
302 let nodes: HashMap<&PackageId, _> =
304 resolve.nodes.iter().map(|node| (&node.id, node)).collect();
305
306 let mut non_dev_reachable: HashSet<PackageId> = HashSet::new();
308 let mut to_visit: Vec<&PackageId> = self.metadata.workspace_members.iter().collect();
309
310 while let Some(pkg_id) = to_visit.pop() {
311 if let Some(node) = nodes.get(pkg_id) {
312 for dep in &node.deps {
313 let has_non_dev = dep
315 .dep_kinds
316 .iter()
317 .any(|dk| !matches!(dk.kind, DependencyKind::Development));
318
319 if has_non_dev && non_dev_reachable.insert(dep.pkg.clone()) {
320 to_visit.push(&dep.pkg);
321 }
322 }
323 }
324 }
325
326 self.metadata
329 .packages
330 .iter()
331 .filter(|pkg| {
332 !self.metadata.workspace_members.contains(&pkg.id)
333 && !non_dev_reachable.contains(&pkg.id)
334 })
335 .map(|pkg| pkg.id.clone())
336 .collect()
337 }
338
339 #[expect(clippy::unnecessary_wraps, reason = "Not really")]
341 fn dry_run_collect(
342 &self,
343 output_dir: &Path,
344 options: &CollectOptions,
345 ) -> Result<CollectionResult, Error> {
346 let mut skipped = Vec::new();
347 let mut collected_count = 0;
348
349 let dev_only = if options.include_dev {
351 HashSet::new()
352 } else {
353 self.get_dev_only_packages()
354 };
355
356 for pkg in &self.metadata.packages {
357 if self.metadata.workspace_members.contains(&pkg.id) {
358 continue;
359 }
360
361 if dev_only.contains(&pkg.id) {
363 continue;
364 }
365
366 let version = pkg.version.to_string();
367
368 if self.find_registry_source(&pkg.name, &version).is_some() {
369 collected_count += 1;
370 } else {
371 skipped.push(format!("{}-{}", pkg.name, version));
372 }
373 }
374
375 Ok(CollectionResult {
376 output_dir: output_dir.to_path_buf(),
377 crates_collected: collected_count,
378 skipped,
379 })
380 }
381
382 fn update_gitignore(&self) -> Result<(), Error> {
384 let gitignore_path = self.metadata.workspace_root.join(".gitignore");
385 let pattern = ".source_*";
386
387 let content = StdFs::read_to_string(&gitignore_path).unwrap_or_default();
389
390 if content.lines().any(|line| line.trim() == pattern) {
392 return Ok(());
393 }
394
395 let mut file = StdFs::OpenOptions::new()
397 .create(true)
398 .append(true)
399 .open(&gitignore_path)
400 .map_err(|e| Error::SourceCollector(format!("Failed to open .gitignore: {e}")))?;
401
402 if !content.is_empty() && !content.ends_with('\n') {
404 writeln!(file).map_err(|e| {
405 Error::SourceCollector(format!("Failed to write to .gitignore: {e}"))
406 })?;
407 }
408
409 writeln!(file, "{pattern}")
410 .map_err(|e| Error::SourceCollector(format!("Failed to write to .gitignore: {e}")))?;
411
412 Ok(())
413 }
414
415 #[must_use]
417 pub fn list_dependencies(&self) -> Vec<(&str, &str)> {
418 self.metadata
419 .packages
420 .iter()
421 .filter(|pkg| !self.metadata.workspace_members.contains(&pkg.id))
422 .map(|pkg| (pkg.name.as_str(), pkg.version.to_string().leak() as &str))
423 .collect()
424 }
425
426 fn copy_dir_recursive(src: &Path, dest: &Path) -> Result<(), Error> {
428 StdFs::create_dir_all(dest).map_err(|e| {
429 Error::SourceCollector(format!("Failed to create dir {}: {e}", dest.display()))
430 })?;
431
432 for entry in StdFs::read_dir(src).map_err(|e| {
433 Error::SourceCollector(format!("Failed to read dir {}: {e}", src.display()))
434 })? {
435 let entry =
436 entry.map_err(|e| Error::SourceCollector(format!("Failed to read entry: {e}")))?;
437 let path = entry.path();
438 let dest_path = dest.join(entry.file_name());
439
440 if path.is_dir() {
441 Self::copy_dir_recursive(&path, &dest_path)?;
442 } else {
443 StdFs::copy(&path, &dest_path).map_err(|e| {
444 Error::SourceCollector(format!(
445 "Failed to copy {} to {}: {e}",
446 path.display(),
447 dest_path.display()
448 ))
449 })?;
450 }
451 }
452
453 Ok(())
454 }
455}
456
457struct TimeUtils;
458
459impl TimeUtils {
460 fn chrono_lite_now() -> String {
462 let duration = SystemTime::now()
463 .duration_since(UNIX_EPOCH)
464 .unwrap_or_default();
465 let secs = duration.as_secs();
466
467 let days_since_epoch = secs / 86400;
470 let time_of_day = secs % 86400;
471
472 let hours = time_of_day / 3600;
473 let minutes = (time_of_day % 3600) / 60;
474 let seconds = time_of_day % 60;
475
476 let mut year = 1970;
478 let mut remaining_days = days_since_epoch;
479
480 loop {
481 let days_in_year = if Self::is_leap_year(year) { 366 } else { 365 };
482
483 if remaining_days < days_in_year {
484 break;
485 }
486
487 remaining_days -= days_in_year;
488 year += 1;
489 }
490
491 let mut month = 1;
492 let days_in_months = if Self::is_leap_year(year) {
493 [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
494 } else {
495 [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
496 };
497
498 for days in days_in_months {
499 if remaining_days < days {
500 break;
501 }
502 remaining_days -= days;
503 month += 1;
504 }
505
506 let day = remaining_days + 1;
507
508 format!("{year:04}-{month:02}-{day:02}T{hours:02}:{minutes:02}:{seconds:02}Z")
509 }
510
511 const fn is_leap_year(year: u64) -> bool {
512 (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400)
513 }
514}
515
516#[cfg(test)]
517mod tests {
518 use super::{SourceCollector, TimeUtils};
519
520 #[test]
521 fn test_chrono_lite_now() {
522 let ts = TimeUtils::chrono_lite_now();
523 assert!(ts.contains('T'));
525 assert!(ts.ends_with('Z'));
526 assert!(ts.starts_with("20")); }
528
529 #[test]
530 fn test_is_leap_year() {
531 assert!(TimeUtils::is_leap_year(2000));
532 assert!(TimeUtils::is_leap_year(2024));
533 assert!(!TimeUtils::is_leap_year(1900));
534 assert!(!TimeUtils::is_leap_year(2023));
535 }
536
537 #[test]
538 fn test_get_dev_only_packages_detects_dev_deps() {
539 let collector = SourceCollector::new().expect("Failed to create collector");
541 let dev_only = collector.get_dev_only_packages();
542
543 let dev_only_names: Vec<&str> = collector
545 .metadata
546 .packages
547 .iter()
548 .filter(|pkg| dev_only.contains(&pkg.id))
549 .map(|pkg| pkg.name.as_str())
550 .collect();
551
552 assert!(
554 dev_only_names.contains(&"insta"),
555 "insta should be detected as dev-only, got: {dev_only_names:?}"
556 );
557 assert!(
558 dev_only_names.contains(&"divan"),
559 "divan should be detected as dev-only, got: {dev_only_names:?}"
560 );
561 }
562
563 #[test]
564 fn test_get_dev_only_packages_excludes_normal_deps() {
565 let collector = SourceCollector::new().expect("Failed to create collector");
566 let dev_only = collector.get_dev_only_packages();
567
568 let dev_only_names: Vec<&str> = collector
570 .metadata
571 .packages
572 .iter()
573 .filter(|pkg| dev_only.contains(&pkg.id))
574 .map(|pkg| pkg.name.as_str())
575 .collect();
576
577 assert!(
579 !dev_only_names.contains(&"serde"),
580 "serde should NOT be dev-only"
581 );
582 assert!(
583 !dev_only_names.contains(&"clap"),
584 "clap should NOT be dev-only"
585 );
586 assert!(
587 !dev_only_names.contains(&"syn"),
588 "syn should NOT be dev-only"
589 );
590 assert!(
592 !dev_only_names.contains(&"tracing"),
593 "tracing should NOT be dev-only (it's also a normal dependency)"
594 );
595 }
596
597 #[test]
598 fn test_get_dev_only_packages_with_no_resolve() {
599 let mut collector = SourceCollector::new().expect("Failed to create collector");
601
602 collector.metadata.resolve = None;
604
605 let dev_only = collector.get_dev_only_packages();
606 assert!(
607 dev_only.is_empty(),
608 "Should return empty set when no resolve graph"
609 );
610 }
611
612 #[test]
613 fn test_list_dependencies_excludes_workspace_members() {
614 let collector = SourceCollector::new().expect("Failed to create collector");
615 let deps = collector.list_dependencies();
616
617 let dep_names: Vec<&str> = deps.iter().map(|(name, _)| *name).collect();
619 assert!(
620 !dep_names.contains(&"cargo-docs-md"),
621 "Should not include workspace member"
622 );
623
624 assert!(
626 dep_names.contains(&"serde"),
627 "Should include serde dependency"
628 );
629 }
630}