Expand description
§hg-parser
Mercurial repository parser written in the Rust programming language.
§Basic usage
§Add dependency to Cargo.toml
[dependencies]
hg-parser = "0.8"
§Use case - Analyse revision log and export to git fast-import format
use hg_parser::{file_content, FileType, ManifestEntryDetails, MercurialRepository, Revision};
use std::env::args;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::string::ParseError;
use std::time::Instant;
fn main() -> Result<(), Error> {
    let path: PathBuf = args().nth(1).expect("path not provided").parse()?;
    export_repo(path)
}
fn export_repo<P: AsRef<Path>>(path: P) -> Result<(), Error> {
    let start = Instant::now();
    let repo = MercurialRepository::open(path)?;
    let stdout = std::io::stdout();
    let mut writer = stdout.lock();
    for changeset in &repo {
        let revision = changeset.revision;
        eprintln!("rev: {:?}", revision);
        let header = &changeset.header;
        let mut branch = None;
        let mut closed = false;
        for (key, value) in &header.extra {
            if key == b"branch" {
                branch = Some(value.as_slice());
            }
            if key == b"close" && value == b"1" {
                closed = true;
            }
        }
        let mut branch: Vec<_> = branch.unwrap_or(b"master").into();
        for b in branch.iter_mut() {
            if *b == b' ' {
                *b = b'-';
            }
        }
        let user = String::from_utf8_lossy(&header.user);
        let desc = String::from_utf8_lossy(&header.comment);
        let time = header.time.timestamp_secs();
        let timezone = header.time.tz_offset_secs();
        let tz = format!("{:+03}{:02}", -timezone / 3600, ((-timezone % 3600) / 60));
        write!(writer, "reset refs/heads/")?;
        writer.write_all(&branch)?;
        write!(writer, "\ncommit refs/heads/")?;
        writer.write_all(&branch)?;
        writeln!(writer, "\nmark :{}", mark(revision))?;
        writeln!(writer, "author {} {} {}", user, time, tz)?;
        writeln!(writer, "committer {} {} {}", user, time, tz)?;
        writeln!(writer, "data {}", desc.len() + 1)?;
        writeln!(writer, "{}\n", desc)?;
        match (header.p1, header.p2) {
            (Some(p1), Some(p2)) => {
                writeln!(writer, "from :{}", mark(p1))?;
                writeln!(writer, "merge :{}", mark(p2))?;
            }
            (Some(p), None) | (None, Some(p)) => {
                writeln!(writer, "from :{}", mark(p))?;
            }
            _ => (),
        }
        for file in changeset.files {
            match (file.data, file.manifest_entry) {
                (None, None) => {
                    write!(writer, "D ")?;
                    writer.write_all(&file.path)?;
                    writeln!(writer)?;
                }
                (Some(data), Some(manifest_entry)) => {
                    write!(
                        writer,
                        "M {} inline ",
                        match manifest_entry.details {
                            ManifestEntryDetails::File(FileType::Symlink) => "120000",
                            ManifestEntryDetails::File(FileType::Executable) => "100755",
                            ManifestEntryDetails::Tree
                            | ManifestEntryDetails::File(FileType::Regular) => "100644",
                        }
                    )?;
                    writer.write_all(&file.path)?;
                    let data = file_content(&data);
                    writeln!(writer, "\ndata {}", data.len())?;
                    writer.write_all(data)?;
                }
                _ => panic!("Wrong file data!"),
            }
        }
        if closed {
            write!(writer, "reset refs/tags/archive/")?;
            writer.write_all(&branch)?;
            writeln!(writer, "\nfrom :{}\n", mark(revision))?;
            write!(writer, "reset refs/heads/")?;
            writer.write_all(&branch)?;
            writeln!(writer, "\nfrom 0000000000000000000000000000000000000000\n")?;
        }
    }
    for (rev, tag) in repo.tags().unwrap() {
        eprintln!("export tag {}", tag.name);
        writeln!(writer, "reset refs/tags/{}", tag.name).unwrap();
        writeln!(writer, "from :{}", mark(rev)).unwrap();
        writeln!(writer).unwrap();
    }
    eprintln!("Done. Elapsed: {:?}", start.elapsed());
    Ok(())
}
fn mark<R: Into<Revision>>(rev: R) -> usize {
    (rev.into() + 1).0 as usize
}
#[derive(Debug)]
#[allow(dead_code)]
enum Error {
    MercurialRepoException(hg_parser::ErrorKind),
    Parse(ParseError),
    IO(std::io::Error),
}
impl From<ParseError> for Error {
    fn from(value: ParseError) -> Self {
        Error::Parse(value)
    }
}
impl From<std::io::Error> for Error {
    fn from(value: std::io::Error) -> Self {
        Error::IO(value)
    }
}
impl From<hg_parser::ErrorKind> for Error {
    fn from(value: hg_parser::ErrorKind) -> Self {
        Error::MercurialRepoException(value)
    }
}
§Implementation details
hg-parser is based on repository parse code from mononoke project. Which basically based on original Mercurial source code. Version has some simplifications which may not work for you.
Structs§
- Cached version ofMercurialRepository.
- Changesetinfo about revision, contains revision number, header and files with bodies and metadata.
- Changeset’s file. Contains path info, internal blob from Mercurial (use file_content for actual file data) and meta information.
- Changeset’s header.
- Iterator overMercurialRepositoryrevisions.
- Manifest entry for file. Contains revision hash and file metainformation.
- Mercurial repository. Top-level structure for access to change sets and tags.
- Mercurial revision’s index.
- An iterator over a range ofRevision.
- Shares instance ofCachedMercurialRepositorybetween multiple readers.
Enums§
- File type. Can be regular, executable, or symlink.
- File meta information.
Functions§
- Extract blob data (raw file content) from internal Mercurial representation. This representation is by default returned by ChangesetIter iterator.