Skip to main content

provenant/models/
datasource_id.rs

1//! Datasource identifiers for package parsers.
2//!
3//! Each variant uniquely identifies the type of package data source (file format)
4//! that was parsed. These IDs enable the assembly system to intelligently merge
5//! related package files.
6
7use serde::{Deserialize, Serialize};
8use std::fmt;
9use strum::{EnumCount, EnumIter};
10
11/// Unique identifier for the type of package data source (file format).
12///
13/// Datasource IDs distinguish between different file types within the same ecosystem
14/// (e.g., `NpmPackageJson` vs `NpmPackageLockJson`). The assembly system uses these
15/// IDs to match packages from related files for merging into a single logical package.
16///
17/// # Serialization
18///
19/// Variants serialize to snake_case strings matching the Python reference values.
20/// The JSON output is identical to the Python ScanCode Toolkit.
21///
22/// # Examples
23///
24/// ```ignore
25/// use provenant::models::DatasourceId;
26///
27/// let id = DatasourceId::NpmPackageJson;
28/// assert_eq!(id.as_ref(), "npm_package_json");
29/// assert_eq!(id.to_string(), "npm_package_json");
30/// ```
31#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, EnumCount, EnumIter)]
32#[serde(rename_all = "snake_case")]
33pub enum DatasourceId {
34    // ── About/README/OS ──
35    AboutFile,
36    Readme,
37    EtcOsRelease,
38
39    // ── Alpine ──
40    AlpineApkArchive,
41    AlpineApkbuild,
42    AlpineInstalledDb,
43
44    // ── Arch Linux ──
45    ArchAurinfo,
46    ArchPkginfo,
47    ArchSrcinfo,
48
49    // ── Android ──
50    AndroidAarLibrary,
51    AndroidApk,
52
53    // ── Apache Axis2 ──
54    Axis2Mar,
55    Axis2ModuleXml,
56
57    // ── Autotools ──
58    AutotoolsConfigure,
59
60    // ── Bazel ──
61    BazelBuild,
62    BazelModule,
63
64    // ── Bower ──
65    BowerJson,
66
67    // ── Buck ──
68    /// Matches Python reference value. More consistent name would be `buck_file`.
69    #[serde(rename = "buck_file")]
70    BuckFile,
71    /// Matches Python reference value. More consistent name would be `buck_metadata`.
72    #[serde(rename = "buck_metadata")]
73    BuckMetadata,
74
75    // ── Bun ──
76    BunLock,
77    BunLockb,
78
79    // ── Cargo/Rust ──
80    CargoLock,
81    CargoToml,
82
83    // ── Chef ──
84    /// Matches Python reference value.
85    #[serde(rename = "chef_cookbook_metadata_json")]
86    ChefCookbookMetadataJson,
87    /// Matches Python reference value.
88    #[serde(rename = "chef_cookbook_metadata_rb")]
89    ChefCookbookMetadataRb,
90
91    // ── CocoaPods ──
92    CocoapodsPodfile,
93    CocoapodsPodfileLock,
94    CocoapodsPodspec,
95    CocoapodsPodspecJson,
96
97    // ── Conan ──
98    #[serde(rename = "conan_conandata_yml")]
99    ConanConanDataYml,
100    #[serde(rename = "conan_conanfile_py")]
101    ConanConanFilePy,
102    #[serde(rename = "conan_conanfile_txt")]
103    ConanConanFileTxt,
104    ConanLock,
105
106    // ── Conda ──
107    /// Matches Python reference value.
108    #[serde(rename = "conda_yaml")]
109    CondaYaml,
110    CondaMetaJson,
111    CondaMetaYaml,
112
113    // ── Clojure ──
114    ClojureDepsEdn,
115    ClojureProjectClj,
116
117    // ── CPAN/Perl ──
118    CpanDistIni,
119    /// Matches Python reference value.
120    #[serde(rename = "cpan_makefile")]
121    CpanMakefile,
122    CpanManifest,
123    CpanMetaJson,
124    CpanMetaYml,
125
126    // ── CRAN/R ──
127    CranDescription,
128
129    // ── Dart/Flutter ──
130    PubspecLock,
131    PubspecYaml,
132
133    // ── Debian ──
134    DebianControlExtractedDeb,
135    DebianControlInSource,
136    DebianCopyright,
137    DebianDeb,
138    /// Matches Python reference value.
139    #[serde(rename = "debian_source_metadata_tarball")]
140    DebianSourceMetadataTarball,
141    DebianDistrolessInstalledDb,
142    /// Matches Python reference value.
143    #[serde(rename = "debian_installed_files_list")]
144    DebianInstalledFilesList,
145    #[serde(rename = "debian_installed_md5sums")]
146    DebianInstalledMd5Sums,
147    DebianInstalledStatusDb,
148    #[serde(rename = "debian_md5sums_in_extracted_deb")]
149    DebianMd5SumsInExtractedDeb,
150    /// Matches Python reference value.
151    #[serde(rename = "debian_original_source_tarball")]
152    DebianOriginalSourceTarball,
153    DebianSourceControlDsc,
154
155    // ── Deno ──
156    DenoJson,
157    DenoLock,
158
159    // ── Docker ──
160    Dockerfile,
161
162    // ── FreeBSD ──
163    FreebsdCompactManifest,
164
165    // ── Go ──
166    Godeps,
167    GoMod,
168    GoModGraph,
169    GoSum,
170    GoWork,
171
172    // ── Haskell / Hackage ──
173    HackageCabal,
174    HackageCabalProject,
175    HackageStackYaml,
176
177    // ── Gradle ──
178    BuildGradle,
179    GradleLockfile,
180    GradleModule,
181
182    // ── Haxe ──
183    HaxelibJson,
184
185    // ── Helm ──
186    HelmChartLock,
187    HelmChartYaml,
188
189    // ── Hex/Elixir ──
190    HexMixLock,
191
192    // ── Java ──
193    AntIvyXml,
194    JavaEarApplicationXml,
195    JavaEarArchive,
196    JavaJar,
197    JavaJarManifest,
198    JavaOsgiManifest,
199    JavaWarArchive,
200    JavaWarWebXml,
201    JbossSar,
202    JbossServiceXml,
203
204    // ── Maven ──
205    MavenPom,
206    MavenPomProperties,
207    MesonBuild,
208
209    SbtBuildSbt,
210
211    // ── Microsoft ──
212    MicrosoftCabinet,
213    MicrosoftUpdateManifestMum,
214
215    // ── Mobile/Browser ──
216    AppleDmg,
217    ChromeCrx,
218    IosIpa,
219    MozillaXpi,
220
221    // ── Meteor ──
222    MeteorPackage,
223
224    NixDefaultNix,
225    NixFlakeLock,
226    NixFlakeNix,
227
228    // ── npm ──
229    NpmPackageJson,
230    NpmPackageLockJson,
231
232    // ── NuGet ──
233    NugetCsproj,
234    NugetDepsJson,
235    NugetDirectoryBuildProps,
236    NugetDirectoryPackagesProps,
237    NugetNupkg,
238    NugetProjectJson,
239    NugetProjectLockJson,
240    NugetPackagesConfig,
241    NugetPackagesLock,
242    /// Serializes to `"nuget_nupsec"` to match Python reference value (typo in original).
243    #[serde(rename = "nuget_nupsec")]
244    NugetNuspec,
245    NugetVbproj,
246    NugetFsproj,
247
248    // ── OCaml/opam ──
249    OpamFile,
250
251    // ── PHP/Composer ──
252    PhpComposerJson,
253    PhpComposerLock,
254
255    // ── pnpm ──
256    PnpmLockYaml,
257    PnpmWorkspaceYaml,
258
259    // ── Python/PyPI ──
260    Pipfile,
261    PipfileLock,
262    PipRequirements,
263    PixiLock,
264    PixiToml,
265    PypiPipOriginJson,
266    PypiEgg,
267    PypiInspectDeplock,
268    PypiJson,
269    PypiPoetryLock,
270    PypiPylockToml,
271    PypiPyprojectToml,
272    PypiSdistPkginfo,
273    PypiSetupCfg,
274    PypiSetupPy,
275    PypiUvLock,
276    PypiWheel,
277    PypiWheelMetadata,
278
279    // ── RPM ──
280    RpmArchive,
281    RpmInstalledDatabaseBdb,
282    RpmInstalledDatabaseNdb,
283    RpmInstalledDatabaseSqlite,
284    RpmMarinerManifest,
285    RpmPackageLicenses,
286    /// Serializes to `"rpm_spefile"` to match Python reference value (typo in original).
287    #[serde(rename = "rpm_spefile")]
288    RpmSpecfile,
289    RpmYumdb,
290
291    // ── Ruby/RubyGems ──
292    Gemfile,
293    GemfileLock,
294    GemArchive,
295    /// Matches Python reference value.
296    #[serde(rename = "gem_archive_extracted")]
297    GemArchiveExtracted,
298    Gemspec,
299
300    // ── Disk Images/Installers ──
301    InstallshieldInstaller,
302    IsoDiskImage,
303    NsisInstaller,
304    SharShellArchive,
305    SquashfsDiskImage,
306
307    // ── Swift ──
308    SwiftPackageManifestJson,
309    SwiftPackageResolved,
310    SwiftPackageShowDependencies,
311
312    // ── vcpkg ──
313    VcpkgJson,
314
315    // ── Yarn ──
316    YarnLock,
317
318    // ── Git ──
319    Gitmodules,
320}
321
322impl DatasourceId {
323    /// Returns the string representation of this datasource ID.
324    ///
325    /// This matches the serialized form used in JSON output.
326    pub fn as_str(&self) -> &'static str {
327        match self {
328            // About/README/OS
329            Self::AboutFile => "about_file",
330            Self::Readme => "readme",
331            Self::EtcOsRelease => "etc_os_release",
332
333            // Alpine
334            Self::AlpineApkArchive => "alpine_apk_archive",
335            Self::AlpineApkbuild => "alpine_apkbuild",
336            Self::AlpineInstalledDb => "alpine_installed_db",
337
338            // Arch Linux
339            Self::ArchAurinfo => "arch_aurinfo",
340            Self::ArchPkginfo => "arch_pkginfo",
341            Self::ArchSrcinfo => "arch_srcinfo",
342
343            // Android
344            Self::AndroidAarLibrary => "android_aar_library",
345            Self::AndroidApk => "android_apk",
346
347            // Apache Axis2
348            Self::Axis2Mar => "axis2_mar",
349            Self::Axis2ModuleXml => "axis2_module_xml",
350
351            // Autotools
352            Self::AutotoolsConfigure => "autotools_configure",
353
354            // Bazel
355            Self::BazelBuild => "bazel_build",
356
357            // Bower
358            Self::BowerJson => "bower_json",
359
360            // Buck
361            Self::BuckFile => "buck_file",
362            Self::BuckMetadata => "buck_metadata",
363
364            // Cargo/Rust
365            Self::CargoLock => "cargo_lock",
366            Self::CargoToml => "cargo_toml",
367
368            // Chef
369            Self::ChefCookbookMetadataJson => "chef_cookbook_metadata_json",
370            Self::ChefCookbookMetadataRb => "chef_cookbook_metadata_rb",
371
372            // CocoaPods
373            Self::CocoapodsPodfile => "cocoapods_podfile",
374            Self::CocoapodsPodfileLock => "cocoapods_podfile_lock",
375            Self::CocoapodsPodspec => "cocoapods_podspec",
376            Self::CocoapodsPodspecJson => "cocoapods_podspec_json",
377
378            // Conan
379            Self::ConanConanDataYml => "conan_conandata_yml",
380            Self::ConanConanFilePy => "conan_conanfile_py",
381            Self::ConanConanFileTxt => "conan_conanfile_txt",
382            Self::ConanLock => "conan_lock",
383
384            // Conda
385            Self::CondaYaml => "conda_yaml",
386            Self::CondaMetaJson => "conda_meta_json",
387            Self::CondaMetaYaml => "conda_meta_yaml",
388
389            // Clojure
390            Self::ClojureDepsEdn => "clojure_deps_edn",
391            Self::ClojureProjectClj => "clojure_project_clj",
392
393            // CPAN/Perl
394            Self::CpanDistIni => "cpan_dist_ini",
395            Self::CpanMakefile => "cpan_makefile",
396            Self::CpanManifest => "cpan_manifest",
397            Self::CpanMetaJson => "cpan_meta_json",
398            Self::CpanMetaYml => "cpan_meta_yml",
399
400            // CRAN/R
401            Self::CranDescription => "cran_description",
402
403            // Dart/Flutter
404            Self::PubspecLock => "pubspec_lock",
405            Self::PubspecYaml => "pubspec_yaml",
406
407            // Debian
408            Self::DebianControlExtractedDeb => "debian_control_extracted_deb",
409            Self::DebianControlInSource => "debian_control_in_source",
410            Self::DebianCopyright => "debian_copyright",
411            Self::DebianDeb => "debian_deb",
412            Self::DebianSourceMetadataTarball => "debian_source_metadata_tarball",
413            Self::DebianDistrolessInstalledDb => "debian_distroless_installed_db",
414            Self::DebianInstalledFilesList => "debian_installed_files_list",
415            Self::DebianInstalledMd5Sums => "debian_installed_md5sums",
416            Self::DebianInstalledStatusDb => "debian_installed_status_db",
417            Self::DebianMd5SumsInExtractedDeb => "debian_md5sums_in_extracted_deb",
418            Self::DebianOriginalSourceTarball => "debian_original_source_tarball",
419            Self::DebianSourceControlDsc => "debian_source_control_dsc",
420            Self::DenoJson => "deno_json",
421            Self::DenoLock => "deno_lock",
422            Self::Dockerfile => "dockerfile",
423            Self::BazelModule => "bazel_module",
424
425            // FreeBSD
426            Self::FreebsdCompactManifest => "freebsd_compact_manifest",
427
428            // Go
429            Self::Godeps => "godeps",
430            Self::GoMod => "go_mod",
431            Self::GoModGraph => "go_mod_graph",
432            Self::GoSum => "go_sum",
433            Self::GoWork => "go_work",
434
435            // Haskell / Hackage
436            Self::HackageCabal => "hackage_cabal",
437            Self::HackageCabalProject => "hackage_cabal_project",
438            Self::HackageStackYaml => "hackage_stack_yaml",
439
440            // Gradle
441            Self::BuildGradle => "build_gradle",
442            Self::GradleLockfile => "gradle_lockfile",
443            Self::GradleModule => "gradle_module",
444
445            // Haxe
446            Self::HaxelibJson => "haxelib_json",
447
448            // Helm
449            Self::HelmChartLock => "helm_chart_lock",
450            Self::HelmChartYaml => "helm_chart_yaml",
451
452            // Hex/Elixir
453            Self::HexMixLock => "hex_mix_lock",
454
455            // Java
456            Self::AntIvyXml => "ant_ivy_xml",
457            Self::JavaEarApplicationXml => "java_ear_application_xml",
458            Self::JavaEarArchive => "java_ear_archive",
459            Self::JavaJar => "java_jar",
460            Self::JavaJarManifest => "java_jar_manifest",
461            Self::JavaOsgiManifest => "java_osgi_manifest",
462            Self::JavaWarArchive => "java_war_archive",
463            Self::JavaWarWebXml => "java_war_web_xml",
464            Self::JbossSar => "jboss_sar",
465            Self::JbossServiceXml => "jboss_service_xml",
466
467            // Maven
468            Self::MavenPom => "maven_pom",
469            Self::MavenPomProperties => "maven_pom_properties",
470            Self::MesonBuild => "meson_build",
471            Self::SbtBuildSbt => "sbt_build_sbt",
472
473            // Microsoft
474            Self::MicrosoftCabinet => "microsoft_cabinet",
475            Self::MicrosoftUpdateManifestMum => "microsoft_update_manifest_mum",
476
477            // Mobile/Browser
478            Self::AppleDmg => "apple_dmg",
479            Self::ChromeCrx => "chrome_crx",
480            Self::IosIpa => "ios_ipa",
481            Self::MozillaXpi => "mozilla_xpi",
482
483            // Meteor
484            Self::MeteorPackage => "meteor_package",
485
486            Self::NixDefaultNix => "nix_default_nix",
487            Self::NixFlakeLock => "nix_flake_lock",
488            Self::NixFlakeNix => "nix_flake_nix",
489
490            // npm
491            Self::BunLock => "bun_lock",
492            Self::BunLockb => "bun_lockb",
493            Self::NpmPackageJson => "npm_package_json",
494            Self::NpmPackageLockJson => "npm_package_lock_json",
495
496            // NuGet
497            Self::NugetCsproj => "nuget_csproj",
498            Self::NugetDepsJson => "nuget_deps_json",
499            Self::NugetDirectoryBuildProps => "nuget_directory_build_props",
500            Self::NugetDirectoryPackagesProps => "nuget_directory_packages_props",
501            Self::NugetNupkg => "nuget_nupkg",
502            Self::NugetProjectJson => "nuget_project_json",
503            Self::NugetProjectLockJson => "nuget_project_lock_json",
504            Self::NugetPackagesConfig => "nuget_packages_config",
505            Self::NugetPackagesLock => "nuget_packages_lock",
506            Self::NugetNuspec => "nuget_nupsec",
507            Self::NugetVbproj => "nuget_vbproj",
508            Self::NugetFsproj => "nuget_fsproj",
509
510            // OCaml/opam
511            Self::OpamFile => "opam_file",
512
513            // PHP/Composer
514            Self::PhpComposerJson => "php_composer_json",
515            Self::PhpComposerLock => "php_composer_lock",
516
517            // pnpm
518            Self::PnpmLockYaml => "pnpm_lock_yaml",
519            Self::PnpmWorkspaceYaml => "pnpm_workspace_yaml",
520
521            // Python/PyPI
522            Self::Pipfile => "pipfile",
523            Self::PipfileLock => "pipfile_lock",
524            Self::PipRequirements => "pip_requirements",
525            Self::PixiLock => "pixi_lock",
526            Self::PixiToml => "pixi_toml",
527            Self::PypiPipOriginJson => "pypi_pip_origin_json",
528            Self::PypiEgg => "pypi_egg",
529            Self::PypiInspectDeplock => "pypi_inspect_deplock",
530            Self::PypiJson => "pypi_json",
531            Self::PypiPoetryLock => "pypi_poetry_lock",
532            Self::PypiPylockToml => "pypi_pylock_toml",
533            Self::PypiPyprojectToml => "pypi_pyproject_toml",
534            Self::PypiSdistPkginfo => "pypi_sdist_pkginfo",
535            Self::PypiSetupCfg => "pypi_setup_cfg",
536            Self::PypiSetupPy => "pypi_setup_py",
537            Self::PypiUvLock => "pypi_uv_lock",
538            Self::PypiWheel => "pypi_wheel",
539            Self::PypiWheelMetadata => "pypi_wheel_metadata",
540
541            // RPM
542            Self::RpmArchive => "rpm_archive",
543            Self::RpmInstalledDatabaseBdb => "rpm_installed_database_bdb",
544            Self::RpmInstalledDatabaseNdb => "rpm_installed_database_ndb",
545            Self::RpmInstalledDatabaseSqlite => "rpm_installed_database_sqlite",
546            Self::RpmMarinerManifest => "rpm_mariner_manifest",
547            Self::RpmPackageLicenses => "rpm_package_licenses",
548            Self::RpmSpecfile => "rpm_spefile",
549            Self::RpmYumdb => "rpm_yumdb",
550
551            // Ruby/RubyGems
552            Self::Gemfile => "gemfile",
553            Self::GemfileLock => "gemfile_lock",
554            Self::GemArchive => "gem_archive",
555            Self::GemArchiveExtracted => "gem_archive_extracted",
556            Self::Gemspec => "gemspec",
557
558            // Disk Images/Installers
559            Self::InstallshieldInstaller => "installshield_installer",
560            Self::IsoDiskImage => "iso_disk_image",
561            Self::NsisInstaller => "nsis_installer",
562            Self::SharShellArchive => "shar_shell_archive",
563            Self::SquashfsDiskImage => "squashfs_disk_image",
564
565            // Swift
566            Self::SwiftPackageManifestJson => "swift_package_manifest_json",
567            Self::SwiftPackageResolved => "swift_package_resolved",
568            Self::SwiftPackageShowDependencies => "swift_package_show_dependencies",
569
570            // vcpkg
571            Self::VcpkgJson => "vcpkg_json",
572
573            // Yarn
574            Self::YarnLock => "yarn_lock",
575
576            // Git
577            Self::Gitmodules => "gitmodules",
578        }
579    }
580}
581
582impl AsRef<str> for DatasourceId {
583    fn as_ref(&self) -> &str {
584        self.as_str()
585    }
586}
587
588impl fmt::Display for DatasourceId {
589    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
590        f.write_str(self.as_str())
591    }
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597
598    #[test]
599    fn test_serialization() {
600        let id = DatasourceId::NpmPackageJson;
601        let json = serde_json::to_string(&id).unwrap();
602        assert_eq!(json, r#""npm_package_json""#);
603    }
604
605    #[test]
606    fn test_deserialization() {
607        let json = r#""npm_package_json""#;
608        let id: DatasourceId = serde_json::from_str(json).unwrap();
609        assert_eq!(id, DatasourceId::NpmPackageJson);
610    }
611
612    #[test]
613    fn test_as_str() {
614        assert_eq!(DatasourceId::NpmPackageJson.as_str(), "npm_package_json");
615        assert_eq!(DatasourceId::CargoLock.as_str(), "cargo_lock");
616        assert_eq!(
617            DatasourceId::PypiPyprojectToml.as_str(),
618            "pypi_pyproject_toml"
619        );
620        assert_eq!(DatasourceId::HackageCabal.as_str(), "hackage_cabal");
621    }
622
623    #[test]
624    fn test_display() {
625        assert_eq!(DatasourceId::NpmPackageJson.to_string(), "npm_package_json");
626    }
627
628    #[test]
629    fn test_as_ref() {
630        let id = DatasourceId::NpmPackageJson;
631        let s: &str = id.as_ref();
632        assert_eq!(s, "npm_package_json");
633    }
634
635    #[test]
636    fn test_python_rename_mappings() {
637        // Test the ~12 IDs that changed from our old values to match Python
638        assert_eq!(DatasourceId::BuckFile.as_str(), "buck_file");
639        assert_eq!(DatasourceId::BuckMetadata.as_str(), "buck_metadata");
640        assert_eq!(
641            DatasourceId::ChefCookbookMetadataJson.as_str(),
642            "chef_cookbook_metadata_json"
643        );
644        assert_eq!(
645            DatasourceId::ChefCookbookMetadataRb.as_str(),
646            "chef_cookbook_metadata_rb"
647        );
648        assert_eq!(DatasourceId::CondaYaml.as_str(), "conda_yaml");
649        assert_eq!(DatasourceId::CpanMakefile.as_str(), "cpan_makefile");
650        assert_eq!(
651            DatasourceId::DebianInstalledFilesList.as_str(),
652            "debian_installed_files_list"
653        );
654        assert_eq!(
655            DatasourceId::DebianOriginalSourceTarball.as_str(),
656            "debian_original_source_tarball"
657        );
658        assert_eq!(
659            DatasourceId::DebianSourceMetadataTarball.as_str(),
660            "debian_source_metadata_tarball"
661        );
662        assert_eq!(
663            DatasourceId::GemArchiveExtracted.as_str(),
664            "gem_archive_extracted"
665        );
666        assert_eq!(DatasourceId::NugetNuspec.as_str(), "nuget_nupsec");
667        assert_eq!(DatasourceId::RpmSpecfile.as_str(), "rpm_spefile");
668    }
669}