Expand description
§hg-parser
Mercurial repository parser written in the Rust programming language.
§Basic usage
§Add dependency to Cargo.toml
[dependencies]
hg-parser = "0.9"
§Use case - Analyse revision log and export to git fast-import
format
use std::{
env::args,
io::Write,
path::{Path, PathBuf},
string::ParseError,
time::Instant,
};
use hg_parser::{file_content, FileType, ManifestEntryDetails, MercurialRepository, Revision};
fn main() -> Result<(), Error> {
let path: PathBuf = args().nth(1).expect("path not provided").parse()?;
export_repo(path)
}
fn export_repo<P: AsRef<Path>>(path: P) -> Result<(), Error> {
let start = Instant::now();
let repo = MercurialRepository::open(path)?;
let stdout = std::io::stdout();
let mut writer = stdout.lock();
for changeset in &repo {
let revision = changeset.revision;
eprintln!("rev: {:?}", revision);
let header = &changeset.header;
let mut branch = None;
let mut closed = false;
for (key, value) in &header.extra {
if key == b"branch" {
branch = Some(value.as_slice());
}
if key == b"close" && value == b"1" {
closed = true;
}
}
let mut branch: Vec<_> = branch.unwrap_or(b"master").into();
for b in branch.iter_mut() {
if *b == b' ' {
*b = b'-';
}
}
let user = String::from_utf8_lossy(&header.user);
let desc = String::from_utf8_lossy(&header.comment);
let time = header.time.timestamp_secs();
let timezone = header.time.tz_offset_secs();
let tz = format!("{:+03}{:02}", -timezone / 3600, ((-timezone % 3600) / 60));
write!(writer, "reset refs/heads/")?;
writer.write_all(&branch)?;
write!(writer, "\ncommit refs/heads/")?;
writer.write_all(&branch)?;
writeln!(writer, "\nmark :{}", mark(revision))?;
writeln!(writer, "author {} {} {}", user, time, tz)?;
writeln!(writer, "committer {} {} {}", user, time, tz)?;
writeln!(writer, "data {}", desc.len() + 1)?;
writeln!(writer, "{}\n", desc)?;
match (header.p1, header.p2) {
(Some(p1), Some(p2)) => {
writeln!(writer, "from :{}", mark(p1))?;
writeln!(writer, "merge :{}", mark(p2))?;
}
(Some(p), None) | (None, Some(p)) => {
writeln!(writer, "from :{}", mark(p))?;
}
_ => (),
}
for file in changeset.files {
match (file.data, file.manifest_entry) {
(None, None) => {
write!(writer, "D ")?;
writer.write_all(&file.path)?;
writeln!(writer)?;
}
(Some(data), Some(manifest_entry)) => {
write!(
writer,
"M {} inline ",
match manifest_entry.details {
ManifestEntryDetails::File(FileType::Symlink) => "120000",
ManifestEntryDetails::File(FileType::Executable) => "100755",
ManifestEntryDetails::Tree
| ManifestEntryDetails::File(FileType::Regular) => "100644",
}
)?;
writer.write_all(&file.path)?;
let data = file_content(&data);
writeln!(writer, "\ndata {}", data.len())?;
writer.write_all(data)?;
}
_ => panic!("Wrong file data!"),
}
}
if closed {
write!(writer, "reset refs/tags/archive/")?;
writer.write_all(&branch)?;
writeln!(writer, "\nfrom :{}\n", mark(revision))?;
write!(writer, "reset refs/heads/")?;
writer.write_all(&branch)?;
writeln!(writer, "\nfrom 0000000000000000000000000000000000000000\n")?;
}
}
for (rev, tag) in repo.tags().unwrap() {
eprintln!("export tag {}", tag.name);
writeln!(writer, "reset refs/tags/{}", tag.name).unwrap();
writeln!(writer, "from :{}", mark(rev)).unwrap();
writeln!(writer).unwrap();
}
eprintln!("Done. Elapsed: {:?}", start.elapsed());
Ok(())
}
fn mark<R: Into<Revision>>(rev: R) -> usize {
(rev.into() + 1).0 as usize
}
#[derive(Debug)]
#[allow(dead_code)]
enum Error {
MercurialRepoException(hg_parser::ErrorKind),
Parse(ParseError),
IO(std::io::Error),
}
impl From<ParseError> for Error {
fn from(value: ParseError) -> Self {
Error::Parse(value)
}
}
impl From<std::io::Error> for Error {
fn from(value: std::io::Error) -> Self {
Error::IO(value)
}
}
impl From<hg_parser::ErrorKind> for Error {
fn from(value: hg_parser::ErrorKind) -> Self {
Error::MercurialRepoException(value)
}
}
§Implementation details
hg-parser is based on repository parse code from mononoke project. Which basically based on original Mercurial source code. Version has some simplifications which may not work for you.
Structs§
- Cached version of
MercurialRepository
. Changeset
info about revision, contains revision number, header and files with bodies and metadata.- Changeset’s file. Contains path info, internal blob from Mercurial (use file_content for actual file data) and meta information.
- Changeset’s header.
- Iterator over
MercurialRepository
revisions. - Manifest entry for file. Contains revision hash and file metainformation.
- Mercurial repository. Top-level structure for access to change sets and tags.
- Options for mercurial repository.
- Mercurial revision’s index.
- An iterator over a range of
Revision
. - Shares instance of
CachedMercurialRepository
between multiple readers.
Enums§
- File type. Can be regular, executable, or symlink.
- File meta information.
Functions§
- Extract blob data (raw file content) from internal Mercurial representation. This representation is by default returned by ChangesetIter iterator.