Skip to main content

provenant/models/
datasource_id.rs

1//! Datasource identifiers for package parsers.
2//!
3//! Each variant uniquely identifies the type of package data source (file format)
4//! that was parsed. These IDs enable the assembly system to intelligently merge
5//! related package files.
6
7use serde::{Deserialize, Serialize};
8use std::fmt;
9use strum::{EnumCount, EnumIter};
10
11/// Unique identifier for the type of package data source (file format).
12///
13/// Datasource IDs distinguish between different file types within the same ecosystem
14/// (e.g., `NpmPackageJson` vs `NpmPackageLockJson`). The assembly system uses these
15/// IDs to match packages from related files for merging into a single logical package.
16///
17/// # Serialization
18///
19/// Variants serialize to snake_case strings matching the Python reference values.
20/// The JSON output is identical to the Python ScanCode Toolkit.
21///
22/// # Examples
23///
24/// ```ignore
25/// use provenant::models::DatasourceId;
26///
27/// let id = DatasourceId::NpmPackageJson;
28/// assert_eq!(id.as_ref(), "npm_package_json");
29/// assert_eq!(id.to_string(), "npm_package_json");
30/// ```
31#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, EnumCount, EnumIter)]
32#[serde(rename_all = "snake_case")]
33pub enum DatasourceId {
34    // ── About/README/OS ──
35    AboutFile,
36    Readme,
37    EtcOsRelease,
38
39    // ── Alpine ──
40    AlpineApkArchive,
41    AlpineApkbuild,
42    AlpineInstalledDb,
43
44    // ── Arch Linux ──
45    ArchAurinfo,
46    ArchPkginfo,
47    ArchSrcinfo,
48
49    // ── Android ──
50    AndroidAarLibrary,
51    AndroidApk,
52
53    // ── Apache Axis2 ──
54    Axis2Mar,
55    Axis2ModuleXml,
56
57    // ── Autotools ──
58    AutotoolsConfigure,
59
60    // ── Bazel ──
61    BazelBuild,
62    BazelModule,
63
64    // ── Bower ──
65    BowerJson,
66
67    // ── Buck ──
68    /// Matches Python reference value. More consistent name would be `buck_file`.
69    #[serde(rename = "buck_file")]
70    BuckFile,
71    /// Matches Python reference value. More consistent name would be `buck_metadata`.
72    #[serde(rename = "buck_metadata")]
73    BuckMetadata,
74
75    // ── Bun ──
76    BunLock,
77    BunLockb,
78
79    // ── Cargo/Rust ──
80    CargoLock,
81    CargoToml,
82
83    // ── Chef ──
84    /// Matches Python reference value.
85    #[serde(rename = "chef_cookbook_metadata_json")]
86    ChefCookbookMetadataJson,
87    /// Matches Python reference value.
88    #[serde(rename = "chef_cookbook_metadata_rb")]
89    ChefCookbookMetadataRb,
90
91    // ── CocoaPods ──
92    CocoapodsPodfile,
93    CocoapodsPodfileLock,
94    CocoapodsPodspec,
95    CocoapodsPodspecJson,
96
97    // ── Conan ──
98    #[serde(rename = "conan_conandata_yml")]
99    ConanConanDataYml,
100    #[serde(rename = "conan_conanfile_py")]
101    ConanConanFilePy,
102    #[serde(rename = "conan_conanfile_txt")]
103    ConanConanFileTxt,
104    ConanLock,
105
106    // ── Conda ──
107    /// Matches Python reference value.
108    #[serde(rename = "conda_yaml")]
109    CondaYaml,
110    CondaMetaJson,
111    CondaMetaYaml,
112
113    // ── Clojure ──
114    ClojureDepsEdn,
115    ClojureProjectClj,
116
117    // ── CPAN/Perl ──
118    CpanDistIni,
119    /// Matches Python reference value.
120    #[serde(rename = "cpan_makefile")]
121    CpanMakefile,
122    CpanManifest,
123    CpanMetaJson,
124    CpanMetaYml,
125
126    // ── CRAN/R ──
127    CranDescription,
128
129    // ── Dart/Flutter ──
130    PubspecLock,
131    PubspecYaml,
132
133    // ── Debian ──
134    DebianControlExtractedDeb,
135    DebianControlInSource,
136    DebianCopyright,
137    DebianDeb,
138    /// Matches Python reference value.
139    #[serde(rename = "debian_source_metadata_tarball")]
140    DebianSourceMetadataTarball,
141    DebianDistrolessInstalledDb,
142    /// Matches Python reference value.
143    #[serde(rename = "debian_installed_files_list")]
144    DebianInstalledFilesList,
145    #[serde(rename = "debian_installed_md5sums")]
146    DebianInstalledMd5Sums,
147    DebianInstalledStatusDb,
148    #[serde(rename = "debian_md5sums_in_extracted_deb")]
149    DebianMd5SumsInExtractedDeb,
150    /// Matches Python reference value.
151    #[serde(rename = "debian_original_source_tarball")]
152    DebianOriginalSourceTarball,
153    DebianSourceControlDsc,
154
155    // ── Deno ──
156    DenoJson,
157    DenoLock,
158
159    // ── Docker ──
160    Dockerfile,
161
162    // ── FreeBSD ──
163    FreebsdCompactManifest,
164
165    // ── Go ──
166    Godeps,
167    GoMod,
168    GoModGraph,
169    GoSum,
170    GoWork,
171
172    // ── Haskell / Hackage ──
173    HackageCabal,
174    HackageCabalProject,
175    HackageStackYaml,
176
177    // ── Gradle ──
178    BuildGradle,
179    GradleLockfile,
180    GradleModule,
181
182    // ── Haxe ──
183    HaxelibJson,
184
185    // ── Helm ──
186    HelmChartLock,
187    HelmChartYaml,
188
189    // ── Hex/Elixir ──
190    HexMixLock,
191
192    // ── Java ──
193    AntIvyXml,
194    JavaEarApplicationXml,
195    JavaEarArchive,
196    JavaJar,
197    JavaJarManifest,
198    JavaOsgiManifest,
199    JavaWarArchive,
200    JavaWarWebXml,
201    JbossSar,
202    JbossServiceXml,
203
204    // ── Maven ──
205    MavenPom,
206    MavenPomProperties,
207    MesonBuild,
208
209    SbtBuildSbt,
210
211    // ── Microsoft ──
212    MicrosoftCabinet,
213    MicrosoftUpdateManifestMum,
214
215    // ── Mobile/Browser ──
216    AppleDmg,
217    ChromeCrx,
218    IosIpa,
219    MozillaXpi,
220
221    // ── Meteor ──
222    MeteorPackage,
223
224    // ── npm ──
225    NpmPackageJson,
226    NpmPackageLockJson,
227
228    // ── NuGet ──
229    NugetCsproj,
230    NugetDepsJson,
231    NugetDirectoryBuildProps,
232    NugetDirectoryPackagesProps,
233    NugetNupkg,
234    NugetProjectJson,
235    NugetProjectLockJson,
236    NugetPackagesConfig,
237    NugetPackagesLock,
238    /// Serializes to `"nuget_nupsec"` to match Python reference value (typo in original).
239    #[serde(rename = "nuget_nupsec")]
240    NugetNuspec,
241    NugetVbproj,
242    NugetFsproj,
243
244    // ── OCaml/opam ──
245    OpamFile,
246
247    // ── PHP/Composer ──
248    PhpComposerJson,
249    PhpComposerLock,
250
251    // ── pnpm ──
252    PnpmLockYaml,
253    PnpmWorkspaceYaml,
254
255    // ── Python/PyPI ──
256    Pipfile,
257    PipfileLock,
258    PipRequirements,
259    PixiLock,
260    PixiToml,
261    PypiPipOriginJson,
262    PypiEgg,
263    PypiInspectDeplock,
264    PypiJson,
265    PypiPoetryLock,
266    PypiPylockToml,
267    PypiPyprojectToml,
268    PypiSdistPkginfo,
269    PypiSetupCfg,
270    PypiSetupPy,
271    PypiUvLock,
272    PypiWheel,
273    PypiWheelMetadata,
274
275    // ── RPM ──
276    RpmArchive,
277    RpmInstalledDatabaseBdb,
278    RpmInstalledDatabaseNdb,
279    RpmInstalledDatabaseSqlite,
280    RpmMarinerManifest,
281    RpmPackageLicenses,
282    /// Serializes to `"rpm_spefile"` to match Python reference value (typo in original).
283    #[serde(rename = "rpm_spefile")]
284    RpmSpecfile,
285    RpmYumdb,
286
287    // ── Ruby/RubyGems ──
288    Gemfile,
289    GemfileLock,
290    GemArchive,
291    /// Matches Python reference value.
292    #[serde(rename = "gem_archive_extracted")]
293    GemArchiveExtracted,
294    Gemspec,
295
296    // ── Disk Images/Installers ──
297    InstallshieldInstaller,
298    IsoDiskImage,
299    NsisInstaller,
300    SharShellArchive,
301    SquashfsDiskImage,
302
303    // ── Swift ──
304    SwiftPackageManifestJson,
305    SwiftPackageResolved,
306    SwiftPackageShowDependencies,
307
308    // ── vcpkg ──
309    VcpkgJson,
310
311    // ── Yarn ──
312    YarnLock,
313
314    // ── Git ──
315    Gitmodules,
316}
317
318impl DatasourceId {
319    /// Returns the string representation of this datasource ID.
320    ///
321    /// This matches the serialized form used in JSON output.
322    pub fn as_str(&self) -> &'static str {
323        match self {
324            // About/README/OS
325            Self::AboutFile => "about_file",
326            Self::Readme => "readme",
327            Self::EtcOsRelease => "etc_os_release",
328
329            // Alpine
330            Self::AlpineApkArchive => "alpine_apk_archive",
331            Self::AlpineApkbuild => "alpine_apkbuild",
332            Self::AlpineInstalledDb => "alpine_installed_db",
333
334            // Arch Linux
335            Self::ArchAurinfo => "arch_aurinfo",
336            Self::ArchPkginfo => "arch_pkginfo",
337            Self::ArchSrcinfo => "arch_srcinfo",
338
339            // Android
340            Self::AndroidAarLibrary => "android_aar_library",
341            Self::AndroidApk => "android_apk",
342
343            // Apache Axis2
344            Self::Axis2Mar => "axis2_mar",
345            Self::Axis2ModuleXml => "axis2_module_xml",
346
347            // Autotools
348            Self::AutotoolsConfigure => "autotools_configure",
349
350            // Bazel
351            Self::BazelBuild => "bazel_build",
352
353            // Bower
354            Self::BowerJson => "bower_json",
355
356            // Buck
357            Self::BuckFile => "buck_file",
358            Self::BuckMetadata => "buck_metadata",
359
360            // Cargo/Rust
361            Self::CargoLock => "cargo_lock",
362            Self::CargoToml => "cargo_toml",
363
364            // Chef
365            Self::ChefCookbookMetadataJson => "chef_cookbook_metadata_json",
366            Self::ChefCookbookMetadataRb => "chef_cookbook_metadata_rb",
367
368            // CocoaPods
369            Self::CocoapodsPodfile => "cocoapods_podfile",
370            Self::CocoapodsPodfileLock => "cocoapods_podfile_lock",
371            Self::CocoapodsPodspec => "cocoapods_podspec",
372            Self::CocoapodsPodspecJson => "cocoapods_podspec_json",
373
374            // Conan
375            Self::ConanConanDataYml => "conan_conandata_yml",
376            Self::ConanConanFilePy => "conan_conanfile_py",
377            Self::ConanConanFileTxt => "conan_conanfile_txt",
378            Self::ConanLock => "conan_lock",
379
380            // Conda
381            Self::CondaYaml => "conda_yaml",
382            Self::CondaMetaJson => "conda_meta_json",
383            Self::CondaMetaYaml => "conda_meta_yaml",
384
385            // Clojure
386            Self::ClojureDepsEdn => "clojure_deps_edn",
387            Self::ClojureProjectClj => "clojure_project_clj",
388
389            // CPAN/Perl
390            Self::CpanDistIni => "cpan_dist_ini",
391            Self::CpanMakefile => "cpan_makefile",
392            Self::CpanManifest => "cpan_manifest",
393            Self::CpanMetaJson => "cpan_meta_json",
394            Self::CpanMetaYml => "cpan_meta_yml",
395
396            // CRAN/R
397            Self::CranDescription => "cran_description",
398
399            // Dart/Flutter
400            Self::PubspecLock => "pubspec_lock",
401            Self::PubspecYaml => "pubspec_yaml",
402
403            // Debian
404            Self::DebianControlExtractedDeb => "debian_control_extracted_deb",
405            Self::DebianControlInSource => "debian_control_in_source",
406            Self::DebianCopyright => "debian_copyright",
407            Self::DebianDeb => "debian_deb",
408            Self::DebianSourceMetadataTarball => "debian_source_metadata_tarball",
409            Self::DebianDistrolessInstalledDb => "debian_distroless_installed_db",
410            Self::DebianInstalledFilesList => "debian_installed_files_list",
411            Self::DebianInstalledMd5Sums => "debian_installed_md5sums",
412            Self::DebianInstalledStatusDb => "debian_installed_status_db",
413            Self::DebianMd5SumsInExtractedDeb => "debian_md5sums_in_extracted_deb",
414            Self::DebianOriginalSourceTarball => "debian_original_source_tarball",
415            Self::DebianSourceControlDsc => "debian_source_control_dsc",
416            Self::DenoJson => "deno_json",
417            Self::DenoLock => "deno_lock",
418            Self::Dockerfile => "dockerfile",
419            Self::BazelModule => "bazel_module",
420
421            // FreeBSD
422            Self::FreebsdCompactManifest => "freebsd_compact_manifest",
423
424            // Go
425            Self::Godeps => "godeps",
426            Self::GoMod => "go_mod",
427            Self::GoModGraph => "go_mod_graph",
428            Self::GoSum => "go_sum",
429            Self::GoWork => "go_work",
430
431            // Haskell / Hackage
432            Self::HackageCabal => "hackage_cabal",
433            Self::HackageCabalProject => "hackage_cabal_project",
434            Self::HackageStackYaml => "hackage_stack_yaml",
435
436            // Gradle
437            Self::BuildGradle => "build_gradle",
438            Self::GradleLockfile => "gradle_lockfile",
439            Self::GradleModule => "gradle_module",
440
441            // Haxe
442            Self::HaxelibJson => "haxelib_json",
443
444            // Helm
445            Self::HelmChartLock => "helm_chart_lock",
446            Self::HelmChartYaml => "helm_chart_yaml",
447
448            // Hex/Elixir
449            Self::HexMixLock => "hex_mix_lock",
450
451            // Java
452            Self::AntIvyXml => "ant_ivy_xml",
453            Self::JavaEarApplicationXml => "java_ear_application_xml",
454            Self::JavaEarArchive => "java_ear_archive",
455            Self::JavaJar => "java_jar",
456            Self::JavaJarManifest => "java_jar_manifest",
457            Self::JavaOsgiManifest => "java_osgi_manifest",
458            Self::JavaWarArchive => "java_war_archive",
459            Self::JavaWarWebXml => "java_war_web_xml",
460            Self::JbossSar => "jboss_sar",
461            Self::JbossServiceXml => "jboss_service_xml",
462
463            // Maven
464            Self::MavenPom => "maven_pom",
465            Self::MavenPomProperties => "maven_pom_properties",
466            Self::MesonBuild => "meson_build",
467            Self::SbtBuildSbt => "sbt_build_sbt",
468
469            // Microsoft
470            Self::MicrosoftCabinet => "microsoft_cabinet",
471            Self::MicrosoftUpdateManifestMum => "microsoft_update_manifest_mum",
472
473            // Mobile/Browser
474            Self::AppleDmg => "apple_dmg",
475            Self::ChromeCrx => "chrome_crx",
476            Self::IosIpa => "ios_ipa",
477            Self::MozillaXpi => "mozilla_xpi",
478
479            // Meteor
480            Self::MeteorPackage => "meteor_package",
481
482            // npm
483            Self::BunLock => "bun_lock",
484            Self::BunLockb => "bun_lockb",
485            Self::NpmPackageJson => "npm_package_json",
486            Self::NpmPackageLockJson => "npm_package_lock_json",
487
488            // NuGet
489            Self::NugetCsproj => "nuget_csproj",
490            Self::NugetDepsJson => "nuget_deps_json",
491            Self::NugetDirectoryBuildProps => "nuget_directory_build_props",
492            Self::NugetDirectoryPackagesProps => "nuget_directory_packages_props",
493            Self::NugetNupkg => "nuget_nupkg",
494            Self::NugetProjectJson => "nuget_project_json",
495            Self::NugetProjectLockJson => "nuget_project_lock_json",
496            Self::NugetPackagesConfig => "nuget_packages_config",
497            Self::NugetPackagesLock => "nuget_packages_lock",
498            Self::NugetNuspec => "nuget_nupsec",
499            Self::NugetVbproj => "nuget_vbproj",
500            Self::NugetFsproj => "nuget_fsproj",
501
502            // OCaml/opam
503            Self::OpamFile => "opam_file",
504
505            // PHP/Composer
506            Self::PhpComposerJson => "php_composer_json",
507            Self::PhpComposerLock => "php_composer_lock",
508
509            // pnpm
510            Self::PnpmLockYaml => "pnpm_lock_yaml",
511            Self::PnpmWorkspaceYaml => "pnpm_workspace_yaml",
512
513            // Python/PyPI
514            Self::Pipfile => "pipfile",
515            Self::PipfileLock => "pipfile_lock",
516            Self::PipRequirements => "pip_requirements",
517            Self::PixiLock => "pixi_lock",
518            Self::PixiToml => "pixi_toml",
519            Self::PypiPipOriginJson => "pypi_pip_origin_json",
520            Self::PypiEgg => "pypi_egg",
521            Self::PypiInspectDeplock => "pypi_inspect_deplock",
522            Self::PypiJson => "pypi_json",
523            Self::PypiPoetryLock => "pypi_poetry_lock",
524            Self::PypiPylockToml => "pypi_pylock_toml",
525            Self::PypiPyprojectToml => "pypi_pyproject_toml",
526            Self::PypiSdistPkginfo => "pypi_sdist_pkginfo",
527            Self::PypiSetupCfg => "pypi_setup_cfg",
528            Self::PypiSetupPy => "pypi_setup_py",
529            Self::PypiUvLock => "pypi_uv_lock",
530            Self::PypiWheel => "pypi_wheel",
531            Self::PypiWheelMetadata => "pypi_wheel_metadata",
532
533            // RPM
534            Self::RpmArchive => "rpm_archive",
535            Self::RpmInstalledDatabaseBdb => "rpm_installed_database_bdb",
536            Self::RpmInstalledDatabaseNdb => "rpm_installed_database_ndb",
537            Self::RpmInstalledDatabaseSqlite => "rpm_installed_database_sqlite",
538            Self::RpmMarinerManifest => "rpm_mariner_manifest",
539            Self::RpmPackageLicenses => "rpm_package_licenses",
540            Self::RpmSpecfile => "rpm_spefile",
541            Self::RpmYumdb => "rpm_yumdb",
542
543            // Ruby/RubyGems
544            Self::Gemfile => "gemfile",
545            Self::GemfileLock => "gemfile_lock",
546            Self::GemArchive => "gem_archive",
547            Self::GemArchiveExtracted => "gem_archive_extracted",
548            Self::Gemspec => "gemspec",
549
550            // Disk Images/Installers
551            Self::InstallshieldInstaller => "installshield_installer",
552            Self::IsoDiskImage => "iso_disk_image",
553            Self::NsisInstaller => "nsis_installer",
554            Self::SharShellArchive => "shar_shell_archive",
555            Self::SquashfsDiskImage => "squashfs_disk_image",
556
557            // Swift
558            Self::SwiftPackageManifestJson => "swift_package_manifest_json",
559            Self::SwiftPackageResolved => "swift_package_resolved",
560            Self::SwiftPackageShowDependencies => "swift_package_show_dependencies",
561
562            // vcpkg
563            Self::VcpkgJson => "vcpkg_json",
564
565            // Yarn
566            Self::YarnLock => "yarn_lock",
567
568            // Git
569            Self::Gitmodules => "gitmodules",
570        }
571    }
572}
573
574impl AsRef<str> for DatasourceId {
575    fn as_ref(&self) -> &str {
576        self.as_str()
577    }
578}
579
580impl fmt::Display for DatasourceId {
581    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
582        f.write_str(self.as_str())
583    }
584}
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589
590    #[test]
591    fn test_serialization() {
592        let id = DatasourceId::NpmPackageJson;
593        let json = serde_json::to_string(&id).unwrap();
594        assert_eq!(json, r#""npm_package_json""#);
595    }
596
597    #[test]
598    fn test_deserialization() {
599        let json = r#""npm_package_json""#;
600        let id: DatasourceId = serde_json::from_str(json).unwrap();
601        assert_eq!(id, DatasourceId::NpmPackageJson);
602    }
603
604    #[test]
605    fn test_as_str() {
606        assert_eq!(DatasourceId::NpmPackageJson.as_str(), "npm_package_json");
607        assert_eq!(DatasourceId::CargoLock.as_str(), "cargo_lock");
608        assert_eq!(
609            DatasourceId::PypiPyprojectToml.as_str(),
610            "pypi_pyproject_toml"
611        );
612        assert_eq!(DatasourceId::HackageCabal.as_str(), "hackage_cabal");
613    }
614
615    #[test]
616    fn test_display() {
617        assert_eq!(DatasourceId::NpmPackageJson.to_string(), "npm_package_json");
618    }
619
620    #[test]
621    fn test_as_ref() {
622        let id = DatasourceId::NpmPackageJson;
623        let s: &str = id.as_ref();
624        assert_eq!(s, "npm_package_json");
625    }
626
627    #[test]
628    fn test_python_rename_mappings() {
629        // Test the ~12 IDs that changed from our old values to match Python
630        assert_eq!(DatasourceId::BuckFile.as_str(), "buck_file");
631        assert_eq!(DatasourceId::BuckMetadata.as_str(), "buck_metadata");
632        assert_eq!(
633            DatasourceId::ChefCookbookMetadataJson.as_str(),
634            "chef_cookbook_metadata_json"
635        );
636        assert_eq!(
637            DatasourceId::ChefCookbookMetadataRb.as_str(),
638            "chef_cookbook_metadata_rb"
639        );
640        assert_eq!(DatasourceId::CondaYaml.as_str(), "conda_yaml");
641        assert_eq!(DatasourceId::CpanMakefile.as_str(), "cpan_makefile");
642        assert_eq!(
643            DatasourceId::DebianInstalledFilesList.as_str(),
644            "debian_installed_files_list"
645        );
646        assert_eq!(
647            DatasourceId::DebianOriginalSourceTarball.as_str(),
648            "debian_original_source_tarball"
649        );
650        assert_eq!(
651            DatasourceId::DebianSourceMetadataTarball.as_str(),
652            "debian_source_metadata_tarball"
653        );
654        assert_eq!(
655            DatasourceId::GemArchiveExtracted.as_str(),
656            "gem_archive_extracted"
657        );
658        assert_eq!(DatasourceId::NugetNuspec.as_str(), "nuget_nupsec");
659        assert_eq!(DatasourceId::RpmSpecfile.as_str(), "rpm_spefile");
660    }
661}