hg_git_fast_import/
lib.rs

1#[doc = include_str!("../README.md")]
2use std::{
3    borrow::Cow,
4    collections::{HashMap, HashSet},
5    fs::File,
6    io::{
7        self,
8        prelude::{Read, Write},
9    },
10    ops::Range,
11    path::{Path, PathBuf},
12    process::{Command, ExitStatus},
13};
14
15use lazy_static::lazy_static;
16use regex::Regex;
17use tracing::{info, trace};
18
19use ordered_parallel_iterator::OrderedParallelIterator;
20
21pub mod config;
22pub mod env;
23pub mod error;
24pub mod git;
25pub mod multi;
26pub mod single;
27pub mod tools;
28
29use self::config::RepositorySavedState;
30pub use error::ErrorKind;
31
32use hg_parser::{
33    file_content, Changeset, FileType, ManifestEntryDetails, MercurialRepository,
34    MercurialRepositoryOptions, Revision, SharedMercurialRepository,
35};
36
37pub fn read_file(filename: impl AsRef<Path>) -> io::Result<String> {
38    let mut file = File::open(filename)?;
39    let mut buf = String::new();
40    file.read_to_string(&mut buf)?;
41    Ok(buf)
42}
43
44fn to_str(bytes: &[u8]) -> Cow<'_, str> {
45    String::from_utf8_lossy(bytes)
46}
47
48fn to_string(bytes: &[u8]) -> String {
49    to_str(bytes).into()
50}
51
52#[derive(Debug, thiserror::Error)]
53pub enum TargetRepositoryError {
54    #[error("unknown")]
55    Nope,
56    #[error("is not a directory")]
57    IsNotDir,
58    #[error("saved state does not exist")]
59    SavedStateDoesNotExist,
60    #[error("cannot init repository {0}")]
61    CannotInitRepo(ExitStatus),
62    #[error("cannot configure repository {0}")]
63    CannotConfigRepo(ExitStatus),
64    #[error("import failed {0}")]
65    ImportFailed(ExitStatus),
66    #[error("git failure {0}: {1}")]
67    GitFailure(ExitStatus, String),
68    #[error("io error {0}")]
69    IOError(std::io::Error),
70    #[error("verification failed")]
71    VerifyFail,
72}
73
74impl From<std::io::Error> for TargetRepositoryError {
75    fn from(value: std::io::Error) -> Self {
76        TargetRepositoryError::IOError(value)
77    }
78}
79
80pub trait TargetRepository {
81    fn start_import(
82        &mut self,
83        git_active_branches: Option<usize>,
84        default_branch: Option<&str>,
85    ) -> Result<(&mut dyn Write, Option<config::RepositorySavedState>, String), TargetRepositoryError>;
86
87    fn finish(&mut self) -> Result<(), TargetRepositoryError>;
88
89    fn verify(
90        &self,
91        _verified_repo: &str,
92        _subfolder: Option<&str>,
93    ) -> Result<(), TargetRepositoryError> {
94        Ok(())
95    }
96
97    fn save_state(&self, _state: RepositorySavedState) -> Result<(), TargetRepositoryError> {
98        Ok(())
99    }
100
101    fn get_saved_state(&self) -> Option<&RepositorySavedState> {
102        None
103    }
104
105    fn remote_list(&self) -> Result<HashSet<String>, TargetRepositoryError> {
106        unimplemented!();
107    }
108
109    fn remote_add(&self, _name: &str, _url: &str) -> Result<(), TargetRepositoryError> {
110        unimplemented!();
111    }
112
113    fn checkout(&self, _branch: &str) -> Result<(), TargetRepositoryError> {
114        unimplemented!();
115    }
116
117    fn fetch_all(&self) -> Result<(), TargetRepositoryError> {
118        unimplemented!();
119    }
120
121    fn merge_unrelated(&self, _branches: &[&str]) -> Result<(), TargetRepositoryError> {
122        unimplemented!();
123    }
124}
125
126#[derive(Debug, thiserror::Error)]
127pub enum SourceRepositoryError {
128    #[error("pull fail {0}")]
129    PullFail(String),
130}
131
132struct MercurialRepo<'a> {
133    path: PathBuf,
134    inner: SharedMercurialRepository,
135    config: &'a config::RepositoryConfig,
136    env: &'a env::Environment,
137}
138
139impl<'a> MercurialRepo<'a> {
140    /// Open Mercurial repository.
141    pub fn open<P: AsRef<Path>>(
142        path: P,
143        config: &'a config::RepositoryConfig,
144        ignore_unknown_requirements: bool,
145        env: &'a env::Environment,
146    ) -> Result<MercurialRepo<'a>, ErrorKind> {
147        Ok(Self {
148            path: path.as_ref().to_path_buf(),
149            inner: SharedMercurialRepository::new(MercurialRepository::open_with_options(
150                path,
151                MercurialRepositoryOptions {
152                    ignore_unknown_requirements,
153                },
154            )?),
155            config,
156            env,
157        })
158    }
159
160    /// Open Mercurial repository with pull by `hg pull -u` command before import.
161    /// Pull command triggered only if `env.source_pull` is `true`.
162    pub fn open_with_pull<P: AsRef<Path>>(
163        path: P,
164        config: &'a config::RepositoryConfig,
165        ignore_unknown_requirements: bool,
166        env: &'a env::Environment,
167    ) -> Result<MercurialRepo<'a>, ErrorKind> {
168        if env.source_pull {
169            let mut hg = Command::new("hg");
170            hg.args(["pull", "-u"]);
171
172            if env.cron {
173                hg.arg("-q");
174            }
175
176            let status = hg.current_dir(path.as_ref()).status()?;
177            if !status.success() {
178                return Err(SourceRepositoryError::PullFail(format!(
179                    "Cannot pull {}",
180                    path.as_ref().to_str().unwrap()
181                ))
182                .into());
183            }
184        }
185
186        Self::open(path, config, ignore_unknown_requirements, env)
187    }
188
189    fn path(&self) -> &Path {
190        self.path.as_path()
191    }
192
193    fn verify_heads(&self, _allow_unnamed_heads: bool) -> Result<bool, ErrorKind> {
194        Ok(true)
195    }
196
197    fn changelog_len(&self) -> Result<usize, ErrorKind> {
198        Ok(self.inner.last_rev().0 as usize)
199    }
200
201    fn fixup_user(&self, user: &str) -> Result<String, ErrorKind> {
202        if let Some(ref authors) = self.config.authors {
203            if let Some(remap) = authors.get(user).cloned() {
204                return Ok(remap);
205            }
206        }
207
208        if let Some(ref authors) = self.env.authors {
209            if let Some(remap) = authors.get(user).cloned() {
210                return Ok(remap);
211            }
212        }
213
214        lazy_static! {
215            static ref RE: Regex = Regex::new("([^<]+) ?(<[^>]*>)$").unwrap();
216        }
217
218        let (name, email) = if let Some(caps) = RE.captures(user) {
219            (
220                caps.get(1).unwrap().as_str().trim_end(),
221                caps.get(2).unwrap().as_str(),
222            )
223        } else {
224            return Err(ErrorKind::WrongUser(user.to_string()));
225        };
226
227        Ok(format!("{} {}", name, email))
228    }
229
230    fn mark<R: Into<usize>>(&self, revision: R) -> usize {
231        revision.into() + 1 + self.config.offset.unwrap_or(0)
232    }
233
234    fn range(&self, range: Range<usize>) -> OrderedParallelIterator<Changeset> {
235        self.inner.par_range_iter(range.into())
236    }
237
238    fn export_commit(
239        &self,
240        changeset: &mut Changeset,
241        count: usize,
242        brmap: &mut HashMap<String, String>,
243        output: &mut dyn Write,
244        default_branch: &str,
245    ) -> Result<usize, ErrorKind> {
246        let header = &changeset.header;
247
248        let user = self.fixup_user(std::str::from_utf8(&header.user)?)?;
249
250        let mut branch = None;
251        let mut closed = false;
252        for (key, value) in &header.extra {
253            if key == b"branch" {
254                branch = Some(value.as_slice());
255            }
256
257            if key == b"close" && value == b"1" {
258                closed = true;
259            }
260        }
261        let branch: String =
262            std::str::from_utf8(branch.unwrap_or(default_branch.as_bytes()))?.into();
263
264        let branch = brmap.entry(branch.clone()).or_insert_with(|| {
265            sanitize_branchname(
266                &branch,
267                if branch != default_branch || self.config.prefix_default_branch {
268                    self.config.branch_prefix.as_ref()
269                } else {
270                    None
271                },
272                self.env.fix_wrong_branchname,
273            )
274        });
275
276        let revision = changeset.revision;
277
278        if header.p1.is_some() || header.p2.is_some() || revision != 0.into() {
279            writeln!(output, "reset refs/heads/{}", branch)?;
280        }
281        let desc = String::from_utf8_lossy(&header.comment);
282
283        let time = header.time.timestamp_secs();
284        let timezone = header.time.tz_offset_secs();
285        let tz = format!("{:+03}{:02}", -timezone / 3600, ((-timezone % 3600) / 60));
286
287        writeln!(output, "commit refs/heads/{}", branch)?;
288        let mark = self.mark(revision);
289        writeln!(output, "mark :{}", mark)?;
290
291        writeln!(output, "author {} {} {}", user, time, tz)?;
292        writeln!(output, "committer {} {} {}", user, time, tz)?;
293        writeln!(output, "data {}", desc.len() + 1)?;
294        writeln!(output, "{}\n", desc)?;
295
296        match (header.p1, header.p2) {
297            (Some(p1), Some(p2)) => {
298                writeln!(output, "from :{}", self.mark(p1))?;
299                writeln!(output, "merge :{}", self.mark(p2))?;
300            }
301            (Some(p), None) | (None, Some(p)) => {
302                writeln!(output, "from :{}", self.mark(p))?;
303            }
304            _ => (),
305        }
306
307        info!(
308            "{} ({}) | {} | {} | {} | {}",
309            mark, revision.0, branch, user, desc, header.time
310        );
311
312        if self.env.cron {
313            eprintln!(
314                "{} ({}) | {} | {} | {} | {}",
315                mark, revision.0, branch, user, desc, header.time
316            );
317        }
318
319        let prefix = strip_leading_slash(self.config.path_prefix.as_ref(), "");
320        for file in &mut changeset.files {
321            match (&mut file.data, &mut file.manifest_entry) {
322                (None, None) => {
323                    write!(output, "D {}", prefix)?;
324                    output.write_all(&file.path)?;
325                    writeln!(output)?;
326                }
327                (Some(data), Some(manifest_entry)) => {
328                    write!(
329                        output,
330                        "M {} inline {}",
331                        match manifest_entry.details {
332                            ManifestEntryDetails::File(FileType::Symlink) => "120000",
333                            ManifestEntryDetails::File(FileType::Executable) => "100755",
334                            ManifestEntryDetails::Tree
335                            | ManifestEntryDetails::File(FileType::Regular) => "100644",
336                        },
337                        prefix
338                    )?;
339                    output.write_all(&file.path)?;
340                    let data = file_content(data);
341                    writeln!(output, "\ndata {}", data.len())?;
342                    output.write_all(data)?;
343                }
344                _ => {
345                    return Err(ErrorKind::WrongFileData(
346                        String::from_utf8_lossy(&file.path).into(),
347                    ))
348                }
349            }
350        }
351
352        if closed {
353            writeln!(output, "reset refs/tags/archive/{}", branch)?;
354            writeln!(output, "from :{}\n", self.mark(revision))?;
355
356            writeln!(output, "reset refs/heads/{}", branch)?;
357            writeln!(output, "from 0000000000000000000000000000000000000000\n")?;
358        }
359        Ok(count + 1)
360    }
361
362    fn export_tags(
363        &self,
364        range: Range<usize>,
365        mut count: usize,
366        output: &mut dyn Write,
367    ) -> Result<usize, ErrorKind> {
368        info!("Exporting tags");
369        for (revision, tag) in self
370            .inner
371            .tags()?
372            .range(Revision::from(range.start as u32)..Revision::from(range.end as u32))
373        {
374            let tag = sanitize_name(&tag.name, self.config.tag_prefix.as_ref(), "tag");
375
376            writeln!(output, "reset refs/tags/{}", tag).unwrap();
377            writeln!(output, "from :{}", self.mark(*revision)).unwrap();
378            writeln!(output).unwrap();
379            count += 1;
380        }
381        Ok(count)
382    }
383}
384
385fn strip_leading_slash(prefix: Option<&String>, x: &str) -> String {
386    prefix.map_or_else(|| x.to_string(), |p| format!("{}/{}", p, x))
387}
388
389fn sanitize_branchname(name: &str, prefix: Option<&String>, fix_branch_name: bool) -> String {
390    let branchname = sanitize_name(name, prefix, "branch");
391    if !fix_branch_name {
392        return branchname;
393    }
394    let mut result = String::new();
395    let mut chars = branchname.chars().peekable();
396    let mut last = None;
397    while let Some(&c) = chars.peek() {
398        if c != '/' {
399            break;
400        }
401        result.push(c);
402        last = chars.next();
403    }
404    while let Some(&c) = chars.peek() {
405        let c = match c {
406            '\0'..=' ' | '~' | '^' | ':' | '\\' => '-',
407            '.' if last == Some('.') || last.is_none() => '-',
408            c => c,
409        };
410        result.push(c);
411        last = chars.next();
412    }
413    if result.ends_with('/') {
414        result.remove(result.len() - 1);
415        result.push('-');
416    }
417    if result.ends_with(".lock") {
418        result.replace_range((result.len() - 5)..=(result.len() - 5), "-");
419    }
420    result
421}
422
423fn sanitize_name(name: &str, prefix: Option<&String>, what: &str) -> String {
424    trace!("Sanitize {} '{}'", what, name);
425    prefix.map_or_else(|| name.into(), |p| format!("{}{}", p, name))
426
427    //TODO: git-check-ref-format
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433
434    #[test]
435    fn sanitize_branchnames() {
436        assert_eq!(&sanitize_branchname("normal", None, false), "normal");
437        assert_eq!(&sanitize_branchname("normal", None, true), "normal");
438        assert_eq!(&sanitize_branchname("////normal", None, true), "////normal");
439        assert_eq!(
440            &sanitize_branchname("with spaces  ", None, true),
441            "with-spaces--"
442        );
443        assert_eq!(
444            &sanitize_branchname("with spaces  ", Some(&"prefix-".into()), true),
445            "prefix-with-spaces--"
446        );
447        assert_eq!(
448            &sanitize_branchname(".dotatstart", None, true),
449            "-dotatstart"
450        );
451        assert_eq!(
452            &sanitize_branchname("dots.in.the.middle", None, true),
453            "dots.in.the.middle"
454        );
455        assert_eq!(
456            &sanitize_branchname("doubledots..", None, true),
457            "doubledots.-"
458        );
459        assert_eq!(&sanitize_branchname("...", None, true), "---");
460        assert_eq!(
461            &sanitize_branchname("branch.lock", None, true),
462            "branch-lock"
463        );
464        assert_eq!(&sanitize_branchname("//qqq//", None, true), "//qqq/-");
465    }
466}