use std::collections::BTreeMap;
use std::hash::{Hash, Hasher};
use std::{
fmt::{Display, Formatter},
str::FromStr,
};
use rattler_conda_types::{PackageName, PackageRecord};
use serde_with::{DeserializeFromStr, SerializeDisplay};
use thiserror::Error;
use crate::{CondaSourceData, SourceMetadata, UrlOrPath};
trait DynHash {
fn dyn_hash(&self, state: &mut dyn Hasher);
}
impl<T: Hash> DynHash for T {
fn dyn_hash(&self, state: &mut dyn Hasher) {
self.hash(&mut HasherMut(state));
}
}
struct HasherMut<'a>(&'a mut dyn Hasher);
impl Hasher for HasherMut<'_> {
fn write(&mut self, bytes: &[u8]) {
self.0.write(bytes);
}
fn finish(&self) -> u64 {
self.0.finish()
}
}
fn hash_fields(fields: &BTreeMap<&str, &dyn DynHash>, hasher: &mut impl Hasher) {
for (key, value) in fields {
key.hash(hasher);
value.dyn_hash(hasher);
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash, SerializeDisplay, DeserializeFromStr)]
pub struct SourceIdentifier {
name: PackageName,
hash: String,
location: UrlOrPath,
}
const SHORT_HASH_LENGTH: usize = 8;
impl SourceIdentifier {
pub fn new(name: PackageName, hash: impl Into<String>, location: UrlOrPath) -> Self {
Self {
name,
hash: hash.into(),
location,
}
}
pub fn from_source_data(source_data: &CondaSourceData) -> Self {
let short_hash = source_data
.identifier_hash
.clone()
.unwrap_or_else(|| format_short_hash(compute_source_hash(source_data)));
Self {
name: source_data.name().clone(),
hash: short_hash,
location: source_data.location.clone(),
}
}
pub fn name(&self) -> &PackageName {
&self.name
}
pub fn hash(&self) -> &str {
&self.hash
}
pub fn location(&self) -> &UrlOrPath {
&self.location
}
pub fn into_parts(self) -> (PackageName, String, UrlOrPath) {
(self.name, self.hash, self.location)
}
}
impl Display for SourceIdentifier {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}[{}] @ {}",
self.name.as_source(),
self.hash,
self.location
)
}
}
#[derive(Debug, Error, Eq, PartialEq)]
pub enum ParseSourceIdentifierError {
#[error("missing '[' after package name")]
MissingOpenBracket,
#[error("missing ']' after hash")]
MissingCloseBracket,
#[error("missing ' @ ' separator")]
MissingSeparator,
#[error("invalid package name: {0}")]
InvalidPackageName(#[from] rattler_conda_types::InvalidPackageNameError),
#[error("invalid location: {0}")]
InvalidLocation(#[from] crate::url_or_path::PathOrUrlError),
#[error("hash cannot be empty")]
EmptyHash,
}
fn compute_source_hash(source_data: &CondaSourceData) -> u64 {
let mut hasher = xxhash_rust::xxh3::Xxh3::default();
let CondaSourceData {
package_build_source,
variants,
metadata,
timestamp,
location: _,
identifier_hash: _,
} = source_data;
let mut fields: BTreeMap<&str, &dyn DynHash> = BTreeMap::new();
if let Some(package_build_sources) = package_build_source {
fields.insert("package_build_source", package_build_sources);
}
fields.insert("variants", variants);
fields.insert("timestamp", timestamp);
match metadata {
SourceMetadata::Full(full) => {
let PackageRecord {
build,
build_number,
constrains,
depends,
experimental_extra_depends,
noarch,
subdir,
version,
name: _,
arch: _,
platform: _,
features: _,
legacy_bz2_md5: _,
legacy_bz2_size: _,
license: _,
license_family: _,
md5: _,
purls: _,
python_site_packages_path: _,
run_exports: _,
sha256: _,
size: _,
timestamp: _,
track_features: _,
} = &full.package_record;
fields.insert("build", build);
fields.insert("build_number", build_number);
fields.insert("noarch", noarch);
fields.insert("subdir", subdir);
fields.insert("version", version);
if !depends.is_empty() {
fields.insert("depends", depends);
}
if !constrains.is_empty() {
fields.insert("constrains", constrains);
}
if !full.sources.is_empty() {
fields.insert("sources", &full.sources);
}
if !experimental_extra_depends.is_empty() {
fields.insert("extra_depends", experimental_extra_depends);
}
}
SourceMetadata::Partial(partial) => {
if !partial.depends.is_empty() {
fields.insert("depends", &partial.depends);
}
if !partial.sources.is_empty() {
fields.insert("sources", &partial.sources);
}
}
}
hash_fields(&fields, &mut hasher);
hasher.finish()
}
fn format_short_hash(hash: u64) -> String {
format!("{hash:x}")
.chars()
.take(SHORT_HASH_LENGTH)
.collect()
}
impl FromStr for SourceIdentifier {
type Err = ParseSourceIdentifierError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let open_bracket = s
.find('[')
.ok_or(ParseSourceIdentifierError::MissingOpenBracket)?;
let close_bracket = s
.find(']')
.ok_or(ParseSourceIdentifierError::MissingCloseBracket)?;
if close_bracket <= open_bracket {
return Err(ParseSourceIdentifierError::MissingCloseBracket);
}
let name_str = &s[..open_bracket];
let name = PackageName::from_str(name_str)?;
let hash = &s[open_bracket + 1..close_bracket];
if hash.is_empty() {
return Err(ParseSourceIdentifierError::EmptyHash);
}
let remainder = &s[close_bracket + 1..];
let location_str = remainder
.strip_prefix(" @ ")
.ok_or(ParseSourceIdentifierError::MissingSeparator)?;
let location = UrlOrPath::from_str(location_str)?;
Ok(Self {
name,
hash: hash.to_string(),
location,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_relative_path() {
let id: SourceIdentifier = "numba-cuda[9f3c2a7b] @ .".parse().unwrap();
assert_eq!(id.name().as_source(), "numba-cuda");
assert_eq!(id.hash(), "9f3c2a7b");
assert_eq!(id.location().as_str(), ".");
}
#[test]
fn test_parse_url() {
let id: SourceIdentifier = "my-package[abcd1234] @ https://example.com/pkgs/source"
.parse()
.unwrap();
assert_eq!(id.name().as_source(), "my-package");
assert_eq!(id.hash(), "abcd1234");
assert_eq!(id.location().as_str(), "https://example.com/pkgs/source");
}
#[test]
fn test_parse_git_url() {
let id: SourceIdentifier = "my-pkg[deadbeef] @ git+https://github.com/org/repo@main"
.parse()
.unwrap();
assert_eq!(id.name().as_source(), "my-pkg");
assert_eq!(id.hash(), "deadbeef");
assert_eq!(
id.location().as_str(),
"git+https://github.com/org/repo@main"
);
}
#[test]
fn test_display_roundtrip() {
let original = "numba-cuda[9f3c2a7b] @ .";
let id: SourceIdentifier = original.parse().unwrap();
assert_eq!(id.to_string(), original);
}
#[test]
fn test_display_url_roundtrip() {
let original = "my-package[abcd1234] @ https://example.com/pkgs/source";
let id: SourceIdentifier = original.parse().unwrap();
assert_eq!(id.to_string(), original);
}
#[test]
fn test_missing_open_bracket() {
let result: Result<SourceIdentifier, _> = "numba-cuda9f3c2a7b] @ .".parse();
assert!(matches!(
result,
Err(ParseSourceIdentifierError::MissingOpenBracket)
));
}
#[test]
fn test_missing_close_bracket() {
let result: Result<SourceIdentifier, _> = "numba-cuda[9f3c2a7b @ .".parse();
assert!(matches!(
result,
Err(ParseSourceIdentifierError::MissingCloseBracket)
));
}
#[test]
fn test_missing_separator() {
let result: Result<SourceIdentifier, _> = "numba-cuda[9f3c2a7b].".parse();
assert!(matches!(
result,
Err(ParseSourceIdentifierError::MissingSeparator)
));
}
#[test]
fn test_empty_hash() {
let result: Result<SourceIdentifier, _> = "numba-cuda[] @ .".parse();
assert!(matches!(result, Err(ParseSourceIdentifierError::EmptyHash)));
}
#[test]
fn test_invalid_package_name() {
let result: Result<SourceIdentifier, _> = "invalid name with spaces[hash] @ .".parse();
assert!(
matches!(
result,
Err(ParseSourceIdentifierError::InvalidPackageName(_))
),
"expected InvalidPackageName error, got: {result:?}"
);
}
#[test]
fn test_serde_roundtrip() {
let id = SourceIdentifier::new(
PackageName::from_str("my-package").unwrap(),
"abcd1234",
UrlOrPath::from_str(".").unwrap(),
);
let serialized = serde_yaml::to_string(&id).unwrap();
let deserialized: SourceIdentifier = serde_yaml::from_str(&serialized).unwrap();
assert_eq!(id, deserialized);
}
#[test]
fn test_from_source_data() {
use std::collections::BTreeMap;
use rattler_conda_types::{PackageRecord, VersionWithSource};
use crate::CondaSourceData;
let name = PackageName::from_str("numba-cuda").unwrap();
let mut package_record = PackageRecord::new(
name.clone(),
VersionWithSource::from_str("0.23.0").unwrap(),
"py310h3ca6f64_0".to_string(),
);
package_record.subdir = "linux-64".to_string();
let source_data = CondaSourceData::full(
UrlOrPath::from_str(".").unwrap(),
None,
BTreeMap::new(),
None,
None,
package_record,
BTreeMap::new(),
);
let id = SourceIdentifier::from_source_data(&source_data);
assert_eq!(id.name().as_source(), "numba-cuda");
assert_eq!(id.hash().len(), 8);
assert_eq!(id.location().as_str(), ".");
let id2 = SourceIdentifier::from_source_data(&source_data);
assert_eq!(id.hash(), id2.hash());
}
#[test]
fn test_from_source_data_with_variants() {
use std::collections::BTreeMap;
use rattler_conda_types::{PackageRecord, VersionWithSource};
use crate::{CondaSourceData, VariantValue};
let name = PackageName::from_str("numba-cuda").unwrap();
let mut package_record = PackageRecord::new(
name.clone(),
VersionWithSource::from_str("0.23.0").unwrap(),
"py310h3ca6f64_0".to_string(),
);
package_record.subdir = "linux-aarch64".to_string();
let mut variants = BTreeMap::new();
variants.insert(
"python".to_string(),
VariantValue::String("3.10.*".to_string()),
);
variants.insert(
"target_platform".to_string(),
VariantValue::String("linux-aarch64".to_string()),
);
let source_data = CondaSourceData::full(
UrlOrPath::from_str(".").unwrap(),
None,
variants,
None,
None,
package_record,
BTreeMap::new(),
);
let id = SourceIdentifier::from_source_data(&source_data);
assert_eq!(id.name().as_source(), "numba-cuda");
assert_eq!(id.hash().len(), 8);
}
#[test]
fn compute_test_data_hashes() {
use std::collections::BTreeMap;
use rattler_conda_types::{PackageRecord, VersionWithSource};
use crate::CondaSourceData;
fn source_identifier(
name: &str,
version: &str,
build: &str,
subdir: &str,
location: &str,
) -> String {
let pkg_name = PackageName::from_str(name).unwrap();
let mut package_record = PackageRecord::new(
pkg_name,
VersionWithSource::from_str(version).unwrap(),
build.to_string(),
);
package_record.subdir = subdir.to_string();
let source_data = CondaSourceData::full(
UrlOrPath::from_str(location).unwrap(),
None,
BTreeMap::new(),
None,
None,
package_record,
BTreeMap::new(),
);
SourceIdentifier::from_source_data(&source_data).to_string()
}
let hashes = [
source_identifier(
"child-package",
"0.1.0",
"pyhbf21a9e_0",
"noarch",
"child-package",
),
source_identifier(
"minimal-project",
"0.1",
"first",
"linux-64",
"../minimal-project",
),
source_identifier(
"minimal-project",
"0.1",
"first",
"win-64",
"../minimal-project",
),
source_identifier(
"minimal-project",
"0.1",
"second",
"win-64",
"../minimal-project",
),
source_identifier(
"a-python-project",
"0.1",
"py38",
"noarch",
"../a-python-project",
),
source_identifier(
"b-python-project",
"0.1",
"h398123",
"noarch",
"../a-python-project",
),
source_identifier(
"pixi-build-package",
"1.0.0",
"pyhbf21a9e_0",
"noarch",
"pixi-build-package",
),
source_identifier(
"pixi-url-package",
"2.0.0",
"pyhbf21a9e_0",
"noarch",
"pixi-url-package",
),
source_identifier(
"pixi-tag-package",
"1.2.0",
"pyhbf21a9e_0",
"noarch",
"pixi-tag-package",
),
source_identifier(
"pixi-rev-package",
"0.5.0",
"pyhbf21a9e_0",
"noarch",
"pixi-rev-package",
),
];
insta::assert_yaml_snapshot!(hashes);
}
#[test]
fn test_into_full_returns_none_for_partial() {
use std::collections::BTreeMap;
use crate::CondaSourceData;
let name = PackageName::from_str("my-package").unwrap();
let partial = CondaSourceData::partial(
UrlOrPath::from_str(".").unwrap(),
None,
BTreeMap::new(),
None,
None,
name,
vec![],
BTreeMap::new(),
);
assert!(partial.into_full().is_none());
}
#[test]
fn test_into_full_returns_some_for_full() {
use std::collections::BTreeMap;
use rattler_conda_types::{PackageRecord, VersionWithSource};
use crate::CondaSourceData;
let name = PackageName::from_str("my-package").unwrap();
let mut package_record = PackageRecord::new(
name.clone(),
VersionWithSource::from_str("1.0.0").unwrap(),
"h0000000_0".to_string(),
);
package_record.subdir = "linux-64".to_string();
let full = CondaSourceData::full(
UrlOrPath::from_str(".").unwrap(),
None,
BTreeMap::new(),
None,
None,
package_record,
BTreeMap::new(),
);
let converted = full.into_full();
assert!(converted.is_some());
assert_eq!(converted.unwrap().name().as_source(), "my-package");
}
#[test]
fn test_partial_metadata_hash_computation() {
use std::collections::BTreeMap;
use crate::CondaSourceData;
let name = PackageName::from_str("my-package").unwrap();
let partial = CondaSourceData::partial(
UrlOrPath::from_str(".").unwrap(),
None,
BTreeMap::new(),
None,
None,
name,
vec!["dep-a".to_string()],
BTreeMap::new(),
);
let id = SourceIdentifier::from_source_data(&partial);
assert_eq!(id.name().as_source(), "my-package");
assert_eq!(id.hash().len(), SHORT_HASH_LENGTH);
let id2 = SourceIdentifier::from_source_data(&partial);
assert_eq!(id.hash(), id2.hash());
}
#[test]
fn test_different_variants_produce_different_hashes() {
use std::collections::BTreeMap;
use rattler_conda_types::{PackageRecord, VersionWithSource};
use crate::{CondaSourceData, VariantValue};
let name = PackageName::from_str("my-package").unwrap();
let mut package_record = PackageRecord::new(
name.clone(),
VersionWithSource::from_str("1.0.0").unwrap(),
"h0000000_0".to_string(),
);
package_record.subdir = "linux-64".to_string();
let mut variants1 = BTreeMap::new();
variants1.insert(
"python".to_string(),
VariantValue::String("3.10".to_string()),
);
let source_data1 = CondaSourceData::full(
UrlOrPath::from_str(".").unwrap(),
None,
variants1,
None,
None,
package_record.clone(),
BTreeMap::new(),
);
let mut variants2 = BTreeMap::new();
variants2.insert(
"python".to_string(),
VariantValue::String("3.11".to_string()),
);
let source_data2 = CondaSourceData::full(
UrlOrPath::from_str(".").unwrap(),
None,
variants2,
None,
None,
package_record,
BTreeMap::new(),
);
let id1 = SourceIdentifier::from_source_data(&source_data1);
let id2 = SourceIdentifier::from_source_data(&source_data2);
assert_eq!(id1.name(), id2.name());
assert_ne!(id1.hash(), id2.hash());
}
}