use crate::core::dependency::Dependency;
use crate::core::{InternedString, PackageId, SourceId, Summary};
use crate::sources::registry::{RegistryData, RegistryPackage};
use crate::util::paths;
use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver};
use log::info;
use semver::{Version, VersionReq};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::Path;
use std::str;
struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}
impl<'s> UncanonicalizedIter<'s> {
fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
num_hyphen_underscore,
hyphen_combination_num: 0,
}
}
}
impl<'s> Iterator for UncanonicalizedIter<'s> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if self.hyphen_combination_num > 0
&& self.hyphen_combination_num.trailing_zeros() >= self.num_hyphen_underscore
{
return None;
}
let ret = Some(
self.input
.chars()
.scan(0u16, |s, c| {
if (c == '_' || c == '-') && *s <= 15 {
let switch = (self.hyphen_combination_num & (1u16 << *s)) > 0;
let out = if (c == '_') ^ switch { '_' } else { '-' };
*s += 1;
Some(out)
} else {
Some(c)
}
})
.collect(),
);
self.hyphen_combination_num += 1;
ret
}
}
#[test]
fn no_hyphen() {
assert_eq!(
UncanonicalizedIter::new("test").collect::<Vec<_>>(),
vec!["test".to_string()]
)
}
#[test]
fn two_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_st").collect::<Vec<_>>(),
vec![
"te-_st".to_string(),
"te__st".to_string(),
"te--st".to_string(),
"te_-st".to_string()
]
)
}
#[test]
fn overflow_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_-_-_-_-_-_-_-_-st")
.take(100)
.count(),
100
)
}
pub struct RegistryIndex<'cfg> {
source_id: SourceId,
path: Filesystem,
summaries_cache: HashMap<InternedString, Summaries>,
config: &'cfg Config,
}
#[derive(Default)]
struct Summaries {
raw_data: Vec<u8>,
versions: HashMap<Version, MaybeIndexSummary>,
}
enum MaybeIndexSummary {
Unparsed { start: usize, end: usize },
Parsed(IndexSummary),
}
pub struct IndexSummary {
pub summary: Summary,
pub yanked: bool,
}
#[derive(Default)]
struct SummariesCache<'a> {
versions: Vec<(Version, &'a [u8])>,
}
impl<'cfg> RegistryIndex<'cfg> {
pub fn new(
source_id: SourceId,
path: &Filesystem,
config: &'cfg Config,
) -> RegistryIndex<'cfg> {
RegistryIndex {
source_id,
path: path.clone(),
summaries_cache: HashMap::new(),
config,
}
}
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult<&str> {
let req = VersionReq::exact(pkg.version());
let summary = self
.summaries(pkg.name(), &req, load)?
.next()
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?;
summary
.summary
.checksum()
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))
}
pub fn summaries<'a, 'b>(
&'a mut self,
name: InternedString,
req: &'b VersionReq,
load: &mut dyn RegistryData,
) -> CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>
where
'a: 'b,
{
let source_id = self.source_id;
let summaries = self.load_summaries(name, load)?;
let raw_data = &summaries.raw_data;
Ok(summaries
.versions
.iter_mut()
.filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
.filter_map(move |maybe| match maybe.parse(raw_data, source_id) {
Ok(summary) => Some(summary),
Err(e) => {
info!("failed to parse `{}` registry package: {}", name, e);
None
}
}))
}
fn load_summaries(
&mut self,
name: InternedString,
load: &mut dyn RegistryData,
) -> CargoResult<&mut Summaries> {
if self.summaries_cache.contains_key(&name) {
return Ok(self.summaries_cache.get_mut(&name).unwrap());
}
load.prepare()?;
let root = load.assert_index_locked(&self.path);
let cache_root = root.join(".cache");
let index_version = load.current_version();
let fs_name = name
.chars()
.flat_map(|c| c.to_lowercase())
.collect::<String>();
let raw_path = match fs_name.len() {
1 => format!("1/{}", fs_name),
2 => format!("2/{}", fs_name),
3 => format!("3/{}/{}", &fs_name[..1], fs_name),
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
};
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
let summaries = Summaries::parse(
index_version.as_deref(),
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
)?;
if let Some(summaries) = summaries {
self.summaries_cache.insert(name, summaries);
return Ok(self.summaries_cache.get_mut(&name).unwrap());
}
}
self.summaries_cache.insert(name, Summaries::default());
Ok(self.summaries_cache.get_mut(&name).unwrap())
}
pub fn query_inner(
&mut self,
dep: &Dependency,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
) -> CargoResult<()> {
if self.config.offline()
&& self.query_inner_with_online(dep, load, yanked_whitelist, f, false)? != 0
{
return Ok(());
}
self.query_inner_with_online(dep, load, yanked_whitelist, f, true)?;
Ok(())
}
fn query_inner_with_online(
&mut self,
dep: &Dependency,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
online: bool,
) -> CargoResult<usize> {
let source_id = self.source_id;
let summaries = self
.summaries(dep.package_name(), dep.version_req(), load)?
.filter(|s| (online || load.is_crate_downloaded(s.summary.package_id())))
.filter(|s| !s.yanked || yanked_whitelist.contains(&s.summary.package_id()))
.map(|s| s.summary.clone());
let name = dep.package_name().as_str();
let summaries = summaries.filter(|s| match source_id.precise() {
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
let mut vers = p[name.len() + 1..].splitn(2, "->");
if dep
.version_req()
.matches(&vers.next().unwrap().to_semver().unwrap())
{
vers.next().unwrap() == s.version().to_string()
} else {
true
}
}
_ => true,
});
let mut count = 0;
for summary in summaries {
f(summary);
count += 1;
}
Ok(count)
}
pub fn is_yanked(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult<bool> {
let req = VersionReq::exact(pkg.version());
let found = self
.summaries(pkg.name(), &req, load)?
.any(|summary| summary.yanked);
Ok(found)
}
}
impl Summaries {
pub fn parse(
index_version: Option<&str>,
root: &Path,
cache_root: &Path,
relative: &Path,
source_id: SourceId,
load: &mut dyn RegistryData,
config: &Config,
) -> CargoResult<Option<Summaries>> {
let cache_path = cache_root.join(relative);
let mut cache_contents = None;
if let Some(index_version) = index_version {
match fs::read(&cache_path) {
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
Ok(s) => {
log::debug!("fast path for registry cache of {:?}", relative);
if cfg!(debug_assertions) {
cache_contents = Some(s.raw_data);
} else {
return Ok(Some(s));
}
}
Err(e) => {
log::debug!("failed to parse {:?} cache: {}", relative, e);
}
},
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
}
}
log::debug!("slow path for {:?}", relative);
let mut ret = Summaries::default();
let mut hit_closure = false;
let mut cache_bytes = None;
let err = load.load(root, relative, &mut |contents| {
ret.raw_data = contents.to_vec();
let mut cache = SummariesCache::default();
hit_closure = true;
for line in split(contents, b'\n') {
let summary = match IndexSummary::parse(line, source_id) {
Ok(summary) => summary,
Err(e) => {
log::info!("failed to parse {:?} registry package: {}", relative, e);
continue;
}
};
let version = summary.summary.package_id().version().clone();
cache.versions.push((version.clone(), line));
ret.versions.insert(version, summary.into());
}
if let Some(index_version) = index_version {
cache_bytes = Some(cache.serialize(index_version));
}
Ok(())
});
if !hit_closure {
debug_assert!(cache_contents.is_none());
return Ok(None);
}
err?;
if cfg!(debug_assertions) && cache_contents.is_some() {
assert_eq!(cache_bytes, cache_contents);
}
if let Some(cache_bytes) = cache_bytes {
if paths::create_dir_all(cache_path.parent().unwrap()).is_ok() {
let path = Filesystem::new(cache_path.clone());
config.assert_package_cache_locked(&path);
if let Err(e) = fs::write(cache_path, cache_bytes) {
log::info!("failed to write cache: {}", e);
}
}
}
Ok(Some(ret))
}
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
let cache = SummariesCache::parse(&contents, last_index_update)?;
let mut ret = Summaries::default();
for (version, summary) in cache.versions {
let (start, end) = subslice_bounds(&contents, summary);
ret.versions
.insert(version, MaybeIndexSummary::Unparsed { start, end });
}
ret.raw_data = contents;
return Ok(ret);
fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
let outer_start = outer.as_ptr() as usize;
let outer_end = outer_start + outer.len();
let inner_start = inner.as_ptr() as usize;
let inner_end = inner_start + inner.len();
assert!(inner_start >= outer_start);
assert!(inner_end <= outer_end);
(inner_start - outer_start, inner_end - outer_start)
}
}
}
const CURRENT_CACHE_VERSION: u8 = 1;
impl<'a> SummariesCache<'a> {
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
let (first_byte, rest) = data
.split_first()
.ok_or_else(|| anyhow::format_err!("malformed cache"))?;
if *first_byte != CURRENT_CACHE_VERSION {
anyhow::bail!("looks like a different Cargo's cache, bailing out");
}
let mut iter = split(rest, 0);
if let Some(update) = iter.next() {
if update != last_index_update.as_bytes() {
anyhow::bail!(
"cache out of date: current index ({}) != cache ({})",
last_index_update,
str::from_utf8(update)?,
)
}
} else {
anyhow::bail!("malformed file");
}
let mut ret = SummariesCache::default();
while let Some(version) = iter.next() {
let version = str::from_utf8(version)?;
let version = Version::parse(version)?;
let summary = iter.next().unwrap();
ret.versions.push((version, summary));
}
Ok(ret)
}
fn serialize(&self, index_version: &str) -> Vec<u8> {
let size = self
.versions
.iter()
.map(|(_version, data)| (10 + data.len()))
.sum();
let mut contents = Vec::with_capacity(size);
contents.push(CURRENT_CACHE_VERSION);
contents.extend_from_slice(index_version.as_bytes());
contents.push(0);
for (version, data) in self.versions.iter() {
contents.extend_from_slice(version.to_string().as_bytes());
contents.push(0);
contents.extend_from_slice(data);
contents.push(0);
}
contents
}
}
impl MaybeIndexSummary {
fn parse(&mut self, raw_data: &[u8], source_id: SourceId) -> CargoResult<&IndexSummary> {
let (start, end) = match self {
MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
MaybeIndexSummary::Parsed(summary) => return Ok(summary),
};
let summary = IndexSummary::parse(&raw_data[start..end], source_id)?;
*self = MaybeIndexSummary::Parsed(summary);
match self {
MaybeIndexSummary::Unparsed { .. } => unreachable!(),
MaybeIndexSummary::Parsed(summary) => Ok(summary),
}
}
}
impl From<IndexSummary> for MaybeIndexSummary {
fn from(summary: IndexSummary) -> MaybeIndexSummary {
MaybeIndexSummary::Parsed(summary)
}
}
impl IndexSummary {
fn parse(line: &[u8], source_id: SourceId) -> CargoResult<IndexSummary> {
let RegistryPackage {
name,
vers,
cksum,
deps,
features,
yanked,
links,
} = serde_json::from_slice(line)?;
log::trace!("json parsed registry {}/{}", name, vers);
let pkgid = PackageId::new(name, &vers, source_id)?;
let deps = deps
.into_iter()
.map(|dep| dep.into_dep(source_id))
.collect::<CargoResult<Vec<_>>>()?;
let namespaced_features = false;
let mut summary = Summary::new(pkgid, deps, &features, links, namespaced_features)?;
summary.set_checksum(cksum);
Ok(IndexSummary {
summary,
yanked: yanked.unwrap_or(false),
})
}
}
fn split<'a>(haystack: &'a [u8], needle: u8) -> impl Iterator<Item = &'a [u8]> + 'a {
struct Split<'a> {
haystack: &'a [u8],
needle: u8,
}
impl<'a> Iterator for Split<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<&'a [u8]> {
if self.haystack.is_empty() {
return None;
}
let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
None => (self.haystack, &[][..]),
};
self.haystack = remaining;
Some(ret)
}
}
Split { haystack, needle }
}