1#![doc = include_str!("../README.md")]
2use std::{
3 collections::{BTreeMap, HashMap, HashSet},
4 fs::File,
5 io::{BufRead, BufReader, Read},
6 ops::Deref,
7 path::{Path, PathBuf},
8 sync::{Arc, Mutex},
9};
10
11use lru_cache::LruCache;
12use ordered_parallel_iterator::OrderedParallelIterator;
13use rayon::prelude::*;
14
15mod cache;
16mod changeset;
17mod error;
18mod manifest;
19mod parser;
20mod path;
21mod revisionlog;
22mod types;
23
24use cache::{Cachable, Cache};
25use manifest::Manifest;
26use path::{fncache_fsencode, simple_fsencode, MPath, MPathElement};
27use revisionlog::RevisionLog;
28use types::{MercurialTag, NodeHash, RepositoryRequire};
29
30pub use changeset::*;
31pub use error::ErrorKind;
32pub use manifest::{FileType, ManifestEntry, ManifestEntryDetails};
33pub use types::{Revision, RevisionRange};
34
35#[derive(Default)]
37pub struct MercurialRepositoryOptions {
38 pub ignore_unknown_requirements: bool,
40}
41
42#[derive(Debug)]
43pub struct MercurialRepository {
45 root_path: PathBuf,
46 changelog: RevisionLog,
47 manifest: RevisionLog,
48 requires: HashSet<RepositoryRequire>,
49}
50
51impl MercurialRepository {
52 pub fn open<P: AsRef<Path>>(root_path: P) -> Result<MercurialRepository, ErrorKind> {
54 Self::open_with_options(root_path, Default::default())
55 }
56
57 pub fn open_with_options<P: AsRef<Path>>(
59 root_path: P,
60 options: MercurialRepositoryOptions,
61 ) -> Result<MercurialRepository, ErrorKind> {
62 let base = root_path.as_ref().join(".hg");
63
64 let requires = MercurialRepository::load_requires(&base, &options)?;
65
66 let store = base.join("store");
67
68 let changelog = RevisionLog::init(store.join("00changelog.i"), None)?;
69 let manifest = RevisionLog::init(store.join("00manifest.i"), None)?;
70
71 Ok(MercurialRepository {
72 root_path: root_path.as_ref().into(),
73 changelog,
74 manifest,
75 requires,
76 })
77 }
78
79 pub fn last_rev(&self) -> Revision {
81 self.changelog.last_rev()
82 }
83
84 pub fn iter(&self) -> ChangesetIter {
86 self.into_iter()
87 }
88
89 pub fn header_iter(&self) -> ChangesetHeaderIter {
91 self.range_header_iter(Revision::from(0).range_to(self.last_rev()))
92 }
93
94 pub fn range_iter<RR: Into<RevisionRange>>(&self, revisions_range: RR) -> ChangesetIter {
96 ChangesetIter {
97 repository: self,
98 revisions_range: revisions_range.into(),
99 heads: Mutex::new(LruCache::new(1 << 4)),
100 files: Mutex::new(LruCache::new(1 << 12)),
101 cache: Cache::new(1 << 13),
102 }
103 }
104
105 pub fn range_header_iter<RR: Into<RevisionRange>>(
107 &self,
108 revisions_range: RR,
109 ) -> ChangesetHeaderIter {
110 ChangesetHeaderIter {
111 repository: self,
112 revisions_range: revisions_range.into(),
113 cache: Cache::new(1 << 13),
114 }
115 }
116
117 pub fn tags(&self) -> Result<BTreeMap<Revision, MercurialTag>, ErrorKind> {
119 let mut tags_path = self
120 .root_path
121 .join(".hg")
122 .join("cache")
123 .join("tags2-visible");
124 if !tags_path.exists() {
125 tags_path = self.root_path.join(".hgtags");
126 }
127 let file = File::open(tags_path)?;
128
129 let mut names = HashMap::new();
130 for line in BufReader::new(file).lines() {
131 let tag: Result<MercurialTag, _> = line.unwrap().parse();
132 if let Ok(tag) = tag {
133 if let Some(rev) = self.changelog.info_revision_by_node(&tag.node).cloned() {
134 names.insert(tag.name.clone(), (rev, tag));
135 } else {
136 names.remove(&tag.name);
137 }
138 }
139 }
140 Ok(names.into_values().collect())
141 }
142
143 pub(crate) fn get_manifest(&self, revision: Revision, cache: &Cache) -> Manifest {
144 let data = self.changelog.get_revision(revision, cache).unwrap();
145 let mut lines = data.splitn(2, |&x| x == b'\n');
146 let manifestid: NodeHash = lines
147 .next()
148 .and_then(|x| std::str::from_utf8(x).ok())
149 .and_then(|x| x.parse().ok())
150 .unwrap();
151 self.manifest
152 .get_entry_by_nodeid(&manifestid)
153 .and_then(|x| self.manifest.get_revision_from_entry(x, cache).ok())
154 .map(Manifest::from)
155 .unwrap()
156 }
157
158 fn load_requires<P: AsRef<Path>>(
159 path: P,
160 options: &MercurialRepositoryOptions,
161 ) -> Result<HashSet<RepositoryRequire>, ErrorKind> {
162 let requires_path = path.as_ref().join("requires");
163 let file = File::open(requires_path)?;
164 let lines = BufReader::new(file).lines().map_while(Result::ok);
165 if options.ignore_unknown_requirements {
166 lines
167 .map(|x| match x.parse() {
168 Err(ErrorKind::UnknownRequirement(r)) => Ok(r),
169 other => other,
170 })
171 .collect()
172 } else {
173 Ok(lines
174 .map(|x| x.parse().expect("could not parse requirement"))
175 .collect())
176 }
177 }
178
179 fn fsencode_path(&self, elements: &[MPathElement]) -> PathBuf {
180 if self.requires.contains(&RepositoryRequire::Store) {
185 if self.requires.contains(&RepositoryRequire::FnCache) {
186 let dotencode = self.requires.contains(&RepositoryRequire::DotEncode);
187 fncache_fsencode(elements, dotencode)
188 } else {
189 simple_fsencode(elements)
190 }
191 } else {
192 unimplemented!();
193 }
194 }
195
196 fn changeset_header(&self, cache: &Cache, revision: Revision) -> Option<ChangesetHeader> {
197 self.changelog.get_entry_by_revision(revision).map(|entry| {
198 let data = self
199 .changelog
200 .get_revision_from_entry(entry, cache)
201 .unwrap_or_else(|_| {
202 panic!(
203 "cannot get revision {:?} from changelog of {:?}",
204 revision, &self.root_path
205 )
206 });
207 ChangesetHeader::from_entry_bytes(entry, &data).unwrap()
208 })
209 }
210
211 fn changeset(
212 &self,
213 heads: &Mutex<LruCache<Revision, Arc<Manifest>>>,
214 files_cache: &Mutex<LruCache<Vec<u8>, Arc<RevisionLog>>>,
215 cache: &Cache,
216 revision: Revision,
217 ) -> Option<Changeset> {
218 if let Some(entry) = self.changelog.get_entry_by_revision(revision) {
219 let path = &self.root_path;
222 let data = self
223 .changelog
224 .get_revision_from_entry(entry, cache)
225 .unwrap_or_else(|_| {
226 panic!(
227 "cannot get revision {:?} from changelog of {:?}",
228 revision, path
229 )
230 });
231 let changeset_header = ChangesetHeader::from_entry_bytes(entry, &data).unwrap();
232 if let Some(manifest_entry) = self
233 .manifest
234 .get_entry_by_nodeid(&changeset_header.manifestid)
235 .or_else(|| self.manifest.get_entry_by_revision(revision))
236 {
237 let data = self
238 .manifest
239 .get_revision_from_entry(manifest_entry, cache)
240 .unwrap_or_else(|_| {
241 panic!(
242 "cannot get revision {:?} from manifest of {:?}",
243 revision, path
244 )
245 });
246 let manifest = Manifest::from(data);
247
248 let mut files = Vec::with_capacity(manifest.files.len() * 2);
249 let files = if let (Some(p1), Some(p2)) = (changeset_header.p1, changeset_header.p2)
250 {
251 let mut heads = heads.lock().unwrap();
252 if !heads.contains_key(&p1) {
253 heads.insert(p1, Arc::new(self.get_manifest(p1, cache)));
254 }
255 if !heads.contains_key(&p2) {
256 heads.insert(p2, Arc::new(self.get_manifest(p2, cache)));
257 }
258
259 let p1 = heads.get_mut(&p1).cloned().unwrap();
260 let p2 = heads.get_mut(&p2).cloned().unwrap();
261
262 split_dict(&manifest, &p1, &mut files);
263 split_dict(&manifest, &p2, &mut files);
264
265 files.sort();
266 files.dedup();
267
268 &files
269 } else {
270 &changeset_header.files
271 };
272
273 let files: Vec<_> = files
274 .par_iter()
275 .map(|file| {
276 let file = file.as_slice();
277 let manifest_entry = manifest.files.get(file);
278 let data = manifest_entry.and_then(|manifest_entry| {
279 Self::file_revlog(self, files_cache, file)
280 .get_revision_by_nodeid(&manifest_entry.id, cache)
281 });
282
283 ChangesetFile {
284 path: file.into(),
285 data,
286 manifest_entry: manifest_entry.cloned(),
287 }
288 })
289 .collect();
290 heads
291 .lock()
292 .as_mut()
293 .map(|x| {
294 changeset_header.p1.map(|h1| x.remove(&h1));
295 changeset_header.p2.map(|h2| x.remove(&h2));
296 x.insert(revision, Arc::new(manifest));
297 })
298 .unwrap();
299 let changeset = Changeset {
300 revision,
301 header: changeset_header,
302 files,
303 };
304 Some(changeset)
305 } else {
306 None
307 }
308 } else {
309 None
311 }
312 }
313
314 fn file_revlog(
315 repository: &MercurialRepository,
316 files: &Mutex<LruCache<Vec<u8>, Arc<RevisionLog>>>,
317 file: &[u8],
318 ) -> Arc<RevisionLog> {
319 let mut file_revlog = files.lock().unwrap().get_mut(file).cloned();
320
321 if file_revlog.is_none() {
322 let filerevlog = Arc::new(Self::init_file_revlog(repository, file));
323 files
324 .lock()
325 .unwrap()
326 .insert(file.into(), filerevlog.clone());
327 assert!(files.lock().unwrap().get_mut(file).is_some());
328 file_revlog = Some(filerevlog);
329 }
330
331 file_revlog.unwrap()
332 }
333
334 fn init_file_revlog(repository: &MercurialRepository, file: &[u8]) -> RevisionLog {
335 let root_path = &repository.root_path;
336 let path = MPath::from(file);
337 let path = MPath::new("data")
338 .unwrap()
339 .join(MPath::iter_opt(Some(&path)));
340
341 let mut elements: Vec<MPathElement> = path.into_iter().collect();
342 let mut basename = elements.pop().unwrap();
343
344 let index_path = {
345 let mut basename = basename.clone();
346 basename.extend(b".i");
347 elements.push(basename);
348 repository.fsencode_path(&elements)
349 };
350 elements.pop();
351
352 let data_path = {
353 basename.extend(b".d");
354 elements.push(basename);
355 repository.fsencode_path(&elements)
356 };
357
358 let store = root_path.join(".hg").join("store");
359 match RevisionLog::init(store.join(index_path), Some(store.join(data_path))) {
360 Err(ErrorKind::InvalidPath(info)) => Err(ErrorKind::InvalidPath(format!(
361 "Cannot load revision log for {:?}: {}",
362 std::str::from_utf8(file),
363 info
364 ))),
365 other => other,
366 }
367 .unwrap()
368 }
369}
370
371impl<'a> IntoIterator for &'a MercurialRepository {
372 type Item = Changeset;
373 type IntoIter = ChangesetIter<'a>;
374
375 fn into_iter(self) -> Self::IntoIter {
376 self.range_iter(Revision::from(0).range_to(self.last_rev()))
377 }
378}
379
380pub struct CachedMercurialRepository {
382 repository: MercurialRepository,
383 heads: Mutex<LruCache<Revision, Arc<Manifest>>>,
384 files: Mutex<LruCache<Vec<u8>, Arc<RevisionLog>>>,
385 cache: Cache,
386}
387
388impl From<MercurialRepository> for CachedMercurialRepository {
389 fn from(repository: MercurialRepository) -> Self {
390 Self {
391 repository,
392 heads: Mutex::new(LruCache::new(1 << 4)),
393 files: Mutex::new(LruCache::new(1 << 12)),
394 cache: Cache::new(1 << 13),
395 }
396 }
397}
398
399pub struct SharedMercurialRepository {
401 inner: Arc<CachedMercurialRepository>,
402}
403
404impl SharedMercurialRepository {
405 pub fn new(repository: MercurialRepository) -> Self {
406 Self {
407 inner: Arc::new(repository.into()),
408 }
409 }
410}
411
412impl Deref for SharedMercurialRepository {
413 type Target = MercurialRepository;
414
415 #[inline]
416 fn deref(&self) -> &MercurialRepository {
417 &self.inner.repository
418 }
419}
420
421impl SharedMercurialRepository {
422 pub fn par_range_iter(
423 &self,
424 revision_range: RevisionRange,
425 ) -> OrderedParallelIterator<Changeset> {
426 let cached_repository = self.inner.clone();
427 let xform_ctor = move || {
428 let cached_repository = cached_repository.clone();
429 move |x: Revision| {
430 let repository = &cached_repository.repository;
431 repository
432 .changeset(
433 &cached_repository.heads,
434 &cached_repository.files,
435 &cached_repository.cache,
436 x,
437 )
438 .unwrap()
439 }
440 };
441 OrderedParallelIterator::new(move || revision_range, xform_ctor)
442 }
443}
444
445pub struct ChangesetIter<'a> {
447 repository: &'a MercurialRepository,
448 revisions_range: RevisionRange,
449 heads: Mutex<LruCache<Revision, Arc<Manifest>>>,
450 files: Mutex<LruCache<Vec<u8>, Arc<RevisionLog>>>,
451 cache: Cache,
452}
453
454impl<'a> Iterator for ChangesetIter<'a> {
455 type Item = Changeset;
456
457 fn next(&mut self) -> Option<Self::Item> {
458 self.revisions_range.next().and_then(|revision| {
459 self.repository
460 .changeset(&self.heads, &self.files, &self.cache, revision)
461 })
462 }
463}
464
465pub struct ChangesetHeaderIter<'a> {
466 repository: &'a MercurialRepository,
467 revisions_range: RevisionRange,
468 cache: Cache,
469}
470
471impl<'a> Iterator for ChangesetHeaderIter<'a> {
472 type Item = ChangesetHeader;
473
474 fn next(&mut self) -> Option<Self::Item> {
475 self.revisions_range
476 .next()
477 .and_then(|revision| self.repository.changeset_header(&self.cache, revision))
478 }
479}
480
481fn load_to_vec<P: AsRef<Path>>(path: P) -> Result<Vec<u8>, ErrorKind> {
482 let mut f = match File::open(path.as_ref()) {
483 Ok(f) => f,
484 Err(err) => {
485 return Err(ErrorKind::InvalidPath(format!(
486 "Cannot open {:?}: {:?}",
487 path.as_ref(),
488 err
489 )));
490 }
491 };
492 let mut result = vec![];
493 f.read_to_end(&mut result).unwrap();
494 Ok(result)
495}
496
497pub fn file_content(data: &[u8]) -> &[u8] {
510 let (_, off) = extract_meta(data);
511 &data[off..]
512}
513
514const META_MARKER: &[u8] = b"\x01\n";
515const META_SZ: usize = 2;
516
517fn extract_meta(file: &[u8]) -> (&[u8], usize) {
518 if file.len() < META_SZ {
519 return (&[], 0);
520 }
521 if &file[..META_SZ] != META_MARKER {
522 (&[], 0)
523 } else {
524 let metasz = &file[META_SZ..]
525 .windows(2)
526 .enumerate()
527 .find(|&(_, sample)| sample == META_MARKER)
528 .map(|(idx, _)| idx + META_SZ * 2)
529 .unwrap_or(META_SZ); let metasz = *metasz;
532 if metasz >= META_SZ * 2 {
533 (&file[META_SZ..metasz - META_SZ], metasz)
534 } else {
535 (&[], metasz)
536 }
537 }
538}
539
540fn split_dict(dleft: &Manifest, dright: &Manifest, f: &mut Vec<Vec<u8>>) {
541 for (left, linfo) in &dleft.files {
542 let right = dright.files.get(left);
543 if right.is_none() || right.unwrap() != linfo {
544 f.push(left.clone());
545 }
546 }
547
548 for right in dright.files.keys() {
549 let left = dleft.files.get(right);
550 if left.is_none() {
551 f.push(right.clone());
552 }
553 }
554}