jujube_lib/
git_store.rs

1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{Debug, Error, Formatter};
16use std::io::Cursor;
17use std::io::Read;
18use std::path::PathBuf;
19use std::sync::Mutex;
20use std::time::Duration;
21
22use git2::Oid;
23use protobuf::Message;
24
25use crate::repo_path::{DirRepoPath, FileRepoPath};
26use crate::store::{
27    ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictPart, FileId, MillisSinceEpoch,
28    Signature, Store, StoreError, StoreResult, SymlinkId, Timestamp, Tree, TreeId, TreeValue,
29};
30use backoff::{ExponentialBackoff, Operation};
31use std::ops::Deref;
32
33const COMMITS_NOTES_REF: &str = "refs/notes/jj/commits";
34const CONFLICTS_NOTES_REF: &str = "refs/notes/jj/conflicts";
35const CONFLICT_SUFFIX: &str = ".jjconflict";
36
37impl From<git2::Error> for StoreError {
38    fn from(err: git2::Error) -> Self {
39        match err.code() {
40            git2::ErrorCode::NotFound => StoreError::NotFound,
41            _other => StoreError::Other(err.to_string()),
42        }
43    }
44}
45
46pub struct GitStore {
47    repo: Mutex<git2::Repository>,
48    empty_tree_id: TreeId,
49}
50
51impl GitStore {
52    pub fn load(path: PathBuf) -> Self {
53        let repo = Mutex::new(git2::Repository::open(path).unwrap());
54        let empty_tree_id =
55            TreeId(hex::decode("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap());
56        GitStore {
57            repo,
58            empty_tree_id,
59        }
60    }
61}
62
63fn signature_from_git(signature: git2::Signature) -> Signature {
64    let name = signature.name().unwrap_or("<no name>").to_owned();
65    let email = signature.email().unwrap_or("<no email>").to_owned();
66    let timestamp = MillisSinceEpoch((signature.when().seconds() * 1000) as u64);
67    let tz_offset = signature.when().offset_minutes();
68    Signature {
69        name,
70        email,
71        timestamp: Timestamp {
72            timestamp,
73            tz_offset,
74        },
75    }
76}
77
78fn signature_to_git(signature: &Signature) -> git2::Signature {
79    let name = &signature.name;
80    let email = &signature.email;
81    let time = git2::Time::new(
82        (signature.timestamp.timestamp.0 / 1000) as i64,
83        signature.timestamp.tz_offset,
84    );
85    git2::Signature::new(&name, &email, &time).unwrap()
86}
87
88fn serialize_note(commit: &Commit) -> String {
89    let mut proto = crate::protos::store::Commit::new();
90    proto.is_open = commit.is_open;
91    proto.is_pruned = commit.is_pruned;
92    proto.change_id = commit.change_id.0.to_vec();
93    for predecessor in &commit.predecessors {
94        proto.predecessors.push(predecessor.0.to_vec());
95    }
96    let bytes = proto.write_to_bytes().unwrap();
97    hex::encode(bytes)
98}
99
100fn deserialize_note(commit: &mut Commit, note: &str) {
101    let bytes = hex::decode(note).unwrap();
102    let mut cursor = Cursor::new(bytes);
103    let proto: crate::protos::store::Commit = protobuf::parse_from_reader(&mut cursor).unwrap();
104    commit.is_open = proto.is_open;
105    commit.is_pruned = proto.is_pruned;
106    commit.change_id = ChangeId(proto.change_id);
107    for predecessor in &proto.predecessors {
108        commit.predecessors.push(CommitId(predecessor.clone()));
109    }
110}
111
112fn write_note(
113    git_repo: &git2::Repository,
114    committer: &git2::Signature,
115    notes_ref: &str,
116    oid: git2::Oid,
117    note: &str,
118) -> Result<(), git2::Error> {
119    // It seems that libgit2 doesn't retry when .git/refs/notes/jj/commits.lock
120    // already exists, so we do the retrying ourselves.
121    // TODO: Report this to libgit2.
122    let notes_ref_lock = format!("{}.lock", notes_ref);
123    let mut try_write_note = || {
124        let note_status = git_repo.note(&committer, &committer, Some(notes_ref), oid, note, false);
125        match note_status {
126            Err(err) if err.message().contains(&notes_ref_lock) => {
127                Err(backoff::Error::Transient(err))
128            }
129            Err(err) => Err(backoff::Error::Permanent(err)),
130            Ok(_) => Ok(()),
131        }
132    };
133    let mut backoff = ExponentialBackoff::default();
134    backoff.initial_interval = Duration::from_millis(1);
135    backoff.max_elapsed_time = Some(Duration::from_secs(10));
136    try_write_note
137        .retry(&mut backoff)
138        .map_err(|err| match err {
139            backoff::Error::Permanent(err) => err,
140            backoff::Error::Transient(err) => err,
141        })?;
142    Ok(())
143}
144
145impl Debug for GitStore {
146    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
147        f.debug_struct("GitStore")
148            .field("path", &self.repo.lock().unwrap().path())
149            .finish()
150    }
151}
152
153impl Store for GitStore {
154    fn hash_length(&self) -> usize {
155        20
156    }
157
158    fn git_repo(&self) -> Option<git2::Repository> {
159        let path = self.repo.lock().unwrap().path().to_owned();
160        Some(git2::Repository::open(&path).unwrap())
161    }
162
163    fn read_file(&self, _path: &FileRepoPath, id: &FileId) -> StoreResult<Box<dyn Read>> {
164        if id.0.len() != self.hash_length() {
165            return Err(StoreError::NotFound);
166        }
167        let locked_repo = self.repo.lock().unwrap();
168        let blob = locked_repo
169            .find_blob(Oid::from_bytes(id.0.as_slice()).unwrap())
170            .unwrap();
171        let content = blob.content().to_owned();
172        Ok(Box::new(Cursor::new(content)))
173    }
174
175    fn write_file(&self, _path: &FileRepoPath, contents: &mut dyn Read) -> StoreResult<FileId> {
176        let mut bytes = Vec::new();
177        contents.read_to_end(&mut bytes).unwrap();
178        let locked_repo = self.repo.lock().unwrap();
179        let oid = locked_repo.blob(bytes.as_slice()).unwrap();
180        Ok(FileId(oid.as_bytes().to_vec()))
181    }
182
183    fn read_symlink(&self, _path: &FileRepoPath, id: &SymlinkId) -> Result<String, StoreError> {
184        if id.0.len() != self.hash_length() {
185            return Err(StoreError::NotFound);
186        }
187        let locked_repo = self.repo.lock().unwrap();
188        let blob = locked_repo
189            .find_blob(Oid::from_bytes(id.0.as_slice()).unwrap())
190            .unwrap();
191        let target = String::from_utf8(blob.content().to_owned()).unwrap();
192        Ok(target)
193    }
194
195    fn write_symlink(&self, _path: &FileRepoPath, target: &str) -> Result<SymlinkId, StoreError> {
196        let locked_repo = self.repo.lock().unwrap();
197        let oid = locked_repo.blob(target.as_bytes()).unwrap();
198        Ok(SymlinkId(oid.as_bytes().to_vec()))
199    }
200
201    fn empty_tree_id(&self) -> &TreeId {
202        &self.empty_tree_id
203    }
204
205    fn read_tree(&self, _path: &DirRepoPath, id: &TreeId) -> StoreResult<Tree> {
206        if id == &self.empty_tree_id {
207            return Ok(Tree::default());
208        }
209        if id.0.len() != self.hash_length() {
210            return Err(StoreError::NotFound);
211        }
212
213        let locked_repo = self.repo.lock().unwrap();
214        let git_tree = locked_repo
215            .find_tree(Oid::from_bytes(id.0.as_slice()).unwrap())
216            .unwrap();
217        let mut tree = Tree::default();
218        for entry in git_tree.iter() {
219            let name = entry.name().unwrap();
220            let (name, value) = match entry.kind().unwrap() {
221                git2::ObjectType::Tree => {
222                    let id = TreeId(entry.id().as_bytes().to_vec());
223                    (entry.name().unwrap(), TreeValue::Tree(id))
224                }
225                git2::ObjectType::Blob => match entry.filemode() {
226                    0o100644 => {
227                        let id = FileId(entry.id().as_bytes().to_vec());
228                        if name.ends_with(CONFLICT_SUFFIX) {
229                            (
230                                &name[0..name.len() - CONFLICT_SUFFIX.len()],
231                                TreeValue::Conflict(ConflictId(entry.id().as_bytes().to_vec())),
232                            )
233                        } else {
234                            (
235                                name,
236                                TreeValue::Normal {
237                                    id,
238                                    executable: false,
239                                },
240                            )
241                        }
242                    }
243                    0o100755 => {
244                        let id = FileId(entry.id().as_bytes().to_vec());
245                        (
246                            name,
247                            TreeValue::Normal {
248                                id,
249                                executable: true,
250                            },
251                        )
252                    }
253                    0o120000 => {
254                        let id = SymlinkId(entry.id().as_bytes().to_vec());
255                        (name, TreeValue::Symlink(id))
256                    }
257                    mode => panic!("unexpected file mode {:?}", mode),
258                },
259                git2::ObjectType::Commit => {
260                    let id = CommitId(entry.id().as_bytes().to_vec());
261                    (name, TreeValue::GitSubmodule(id))
262                }
263                kind => panic!("unexpected object type {:?}", kind),
264            };
265            tree.set(name.to_string(), value);
266        }
267        Ok(tree)
268    }
269
270    fn write_tree(&self, _path: &DirRepoPath, contents: &Tree) -> StoreResult<TreeId> {
271        let locked_repo = self.repo.lock().unwrap();
272        let mut builder = locked_repo.treebuilder(None).unwrap();
273        for entry in contents.entries() {
274            let name = entry.name().to_owned();
275            let (name, id, filemode) = match entry.value() {
276                TreeValue::Normal {
277                    id,
278                    executable: false,
279                } => (name, &id.0, 0o100644),
280                TreeValue::Normal {
281                    id,
282                    executable: true,
283                } => (name, &id.0, 0o100755),
284                TreeValue::Symlink(id) => (name, &id.0, 0o120000),
285                TreeValue::Tree(id) => (name, &id.0, 0o040000),
286                TreeValue::GitSubmodule(id) => (name, &id.0, 0o160000),
287                TreeValue::Conflict(id) => (name + CONFLICT_SUFFIX, &id.0, 0o100644),
288            };
289            builder
290                .insert(name, Oid::from_bytes(id).unwrap(), filemode)
291                .unwrap();
292        }
293        let oid = builder.write().unwrap();
294        Ok(TreeId(oid.as_bytes().to_vec()))
295    }
296
297    fn read_commit(&self, id: &CommitId) -> StoreResult<Commit> {
298        if id.0.len() != self.hash_length() {
299            return Err(StoreError::NotFound);
300        }
301
302        let locked_repo = self.repo.lock().unwrap();
303        let git_commit_id = Oid::from_bytes(id.0.as_slice())?;
304        let commit = locked_repo.find_commit(git_commit_id)?;
305        let change_id = ChangeId(id.0.clone().as_slice()[0..16].to_vec());
306        let parents: Vec<_> = commit
307            .parent_ids()
308            .map(|oid| CommitId(oid.as_bytes().to_vec()))
309            .collect();
310        let tree_id = TreeId(commit.tree_id().as_bytes().to_vec());
311        let description = commit.message().unwrap_or("<no message>").to_owned();
312        let author = signature_from_git(commit.author());
313        let committer = signature_from_git(commit.committer());
314
315        let mut commit = Commit {
316            parents,
317            predecessors: vec![],
318            root_tree: tree_id,
319            change_id,
320            description,
321            author,
322            committer,
323            is_open: false,
324            is_pruned: false,
325        };
326
327        let maybe_note = locked_repo
328            .find_note(Some(COMMITS_NOTES_REF), git_commit_id)
329            .ok();
330        if let Some(note) = maybe_note {
331            deserialize_note(&mut commit, note.message().unwrap());
332        }
333
334        Ok(commit)
335    }
336
337    fn write_commit(&self, contents: &Commit) -> StoreResult<CommitId> {
338        // TODO: We shouldn't have to create an in-memory index just to write an
339        // object...
340        let locked_repo = self.repo.lock().unwrap();
341        let git_tree = locked_repo.find_tree(Oid::from_bytes(contents.root_tree.0.as_slice())?)?;
342        let author = signature_to_git(&contents.author);
343        let committer = signature_to_git(&contents.committer);
344        let message = &contents.description;
345
346        let mut parents = vec![];
347        for parent_id in &contents.parents {
348            let parent_git_commit =
349                locked_repo.find_commit(Oid::from_bytes(parent_id.0.as_slice())?)?;
350            parents.push(parent_git_commit);
351        }
352        let parent_refs: Vec<_> = parents.iter().collect();
353        let git_id =
354            locked_repo.commit(None, &author, &committer, &message, &git_tree, &parent_refs)?;
355        let id = CommitId(git_id.as_bytes().to_vec());
356        let note = serialize_note(contents);
357
358        // TODO: Include the extra commit data in commit headers instead of a ref.
359        // Unfortunately, it doesn't seem like libgit2-rs supports that. Perhaps
360        // we'll have to serialize/deserialize the commit data ourselves.
361        write_note(
362            locked_repo.deref(),
363            &committer,
364            COMMITS_NOTES_REF,
365            git_id,
366            &note,
367        )?;
368        Ok(id)
369    }
370
371    fn read_conflict(&self, id: &ConflictId) -> StoreResult<Conflict> {
372        let mut file = self.read_file(&FileRepoPath::from("unused"), &FileId(id.0.clone()))?;
373        let mut data = String::new();
374        file.read_to_string(&mut data)?;
375        let json: serde_json::Value = serde_json::from_str(&data).unwrap();
376        Ok(Conflict {
377            removes: conflict_part_list_from_json(json.get("removes").unwrap()),
378            adds: conflict_part_list_from_json(json.get("adds").unwrap()),
379        })
380    }
381
382    fn write_conflict(&self, conflict: &Conflict) -> StoreResult<ConflictId> {
383        let json = serde_json::json!({
384            "removes": conflict_part_list_to_json(&conflict.removes),
385            "adds": conflict_part_list_to_json(&conflict.adds),
386        });
387        let json_string = json.to_string();
388        let bytes = json_string.as_bytes();
389        let locked_repo = self.repo.lock().unwrap();
390        let oid = locked_repo.blob(bytes).unwrap();
391        let signature = git2::Signature::now("Jujube", "jj@example.com").unwrap();
392        let note_result = write_note(
393            locked_repo.deref(),
394            &signature,
395            CONFLICTS_NOTES_REF,
396            oid,
397            "Conflict object used by Jujube",
398        );
399        match note_result {
400            // It's fine if the conflict already existed (no need to update the note), but
401            // any other error is unexpected.
402            Err(err) if err.code() != git2::ErrorCode::Exists => {
403                return Err(StoreError::from(err));
404            }
405            _ => {}
406        }
407        Ok(ConflictId(oid.as_bytes().to_vec()))
408    }
409}
410
411fn conflict_part_list_to_json(parts: &[ConflictPart]) -> serde_json::Value {
412    serde_json::Value::Array(parts.iter().map(conflict_part_to_json).collect())
413}
414
415fn conflict_part_list_from_json(json: &serde_json::Value) -> Vec<ConflictPart> {
416    json.as_array()
417        .unwrap()
418        .iter()
419        .map(conflict_part_from_json)
420        .collect()
421}
422
423fn conflict_part_to_json(part: &ConflictPart) -> serde_json::Value {
424    serde_json::json!({
425        "value": tree_value_to_json(&part.value),
426    })
427}
428
429fn conflict_part_from_json(json: &serde_json::Value) -> ConflictPart {
430    let json_value = json.get("value").unwrap();
431    ConflictPart {
432        value: tree_value_from_json(json_value),
433    }
434}
435
436fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
437    match value {
438        TreeValue::Normal { id, executable } => serde_json::json!({
439             "file": {
440                 "id": id.hex(),
441                 "executable": executable,
442             },
443        }),
444        TreeValue::Symlink(id) => serde_json::json!({
445             "symlink_id": id.hex(),
446        }),
447        TreeValue::Tree(id) => serde_json::json!({
448             "tree_id": id.hex(),
449        }),
450        TreeValue::GitSubmodule(id) => serde_json::json!({
451             "submodule_id": id.hex(),
452        }),
453        TreeValue::Conflict(id) => serde_json::json!({
454             "conflict_id": id.hex(),
455        }),
456    }
457}
458
459fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
460    if let Some(json_file) = json.get("file") {
461        TreeValue::Normal {
462            id: FileId(bytes_vec_from_json(json_file.get("id").unwrap())),
463            executable: json_file.get("executable").unwrap().as_bool().unwrap(),
464        }
465    } else if let Some(json_id) = json.get("symlink_id") {
466        TreeValue::Symlink(SymlinkId(bytes_vec_from_json(json_id)))
467    } else if let Some(json_id) = json.get("tree_id") {
468        TreeValue::Tree(TreeId(bytes_vec_from_json(json_id)))
469    } else if let Some(json_id) = json.get("submodule_id") {
470        TreeValue::GitSubmodule(CommitId(bytes_vec_from_json(json_id)))
471    } else if let Some(json_id) = json.get("conflict_id") {
472        TreeValue::Conflict(ConflictId(bytes_vec_from_json(json_id)))
473    } else {
474        panic!("unexpected json value in conflict: {:#?}", json);
475    }
476}
477
478fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
479    hex::decode(value.as_str().unwrap()).unwrap()
480}
481
482#[cfg(test)]
483mod tests {
484
485    use crate::store::{FileId, MillisSinceEpoch};
486
487    use super::*;
488
489    #[test]
490    fn read_plain_git_commit() {
491        let temp_dir = tempfile::tempdir().unwrap();
492        let git_repo_path = temp_dir.path();
493        let git_repo = git2::Repository::init(git_repo_path.clone()).unwrap();
494
495        // Add a commit with some files in
496        let blob1 = git_repo.blob(b"content1").unwrap();
497        let blob2 = git_repo.blob(b"normal").unwrap();
498        let mut dir_tree_builder = git_repo.treebuilder(None).unwrap();
499        dir_tree_builder.insert("normal", blob1, 0o100644).unwrap();
500        dir_tree_builder.insert("symlink", blob2, 0o120000).unwrap();
501        let dir_tree_id = dir_tree_builder.write().unwrap();
502        let mut root_tree_builder = git_repo.treebuilder(None).unwrap();
503        root_tree_builder
504            .insert("dir", dir_tree_id, 0o040000)
505            .unwrap();
506        let root_tree_id = root_tree_builder.write().unwrap();
507        let git_author = git2::Signature::new(
508            "git author",
509            "git.author@example.com",
510            &git2::Time::new(1000, 60),
511        )
512        .unwrap();
513        let git_committer = git2::Signature::new(
514            "git committer",
515            "git.committer@example.com",
516            &git2::Time::new(2000, -480),
517        )
518        .unwrap();
519        let git_tree = git_repo.find_tree(root_tree_id).unwrap();
520        let git_commit_id = git_repo
521            .commit(
522                None,
523                &git_author,
524                &git_committer,
525                "git commit message",
526                &git_tree,
527                &[],
528            )
529            .unwrap();
530        let commit_id = CommitId(git_commit_id.as_bytes().to_vec());
531
532        let store = GitStore::load(git_repo_path.to_owned());
533        let commit = store.read_commit(&commit_id).unwrap();
534        assert_eq!(
535            &commit.change_id,
536            &ChangeId(commit_id.0.as_slice()[0..16].to_vec())
537        );
538        assert_eq!(commit.parents, vec![]);
539        assert_eq!(commit.predecessors, vec![]);
540        assert_eq!(commit.root_tree.0.as_slice(), root_tree_id.as_bytes());
541        assert_eq!(commit.is_open, false);
542        assert_eq!(commit.is_pruned, false);
543        assert_eq!(commit.description, "git commit message");
544        assert_eq!(commit.author.name, "git author");
545        assert_eq!(commit.author.email, "git.author@example.com");
546        assert_eq!(
547            commit.author.timestamp.timestamp,
548            MillisSinceEpoch(1000 * 1000)
549        );
550        assert_eq!(commit.author.timestamp.tz_offset, 60);
551        assert_eq!(commit.committer.name, "git committer");
552        assert_eq!(commit.committer.email, "git.committer@example.com");
553        assert_eq!(
554            commit.committer.timestamp.timestamp,
555            MillisSinceEpoch(2000 * 1000)
556        );
557        assert_eq!(commit.committer.timestamp.tz_offset, -480);
558
559        let root_tree = store
560            .read_tree(
561                &DirRepoPath::root(),
562                &TreeId(root_tree_id.as_bytes().to_vec()),
563            )
564            .unwrap();
565        let mut root_entries = root_tree.entries();
566        let dir = root_entries.next().unwrap();
567        assert_eq!(root_entries.next(), None);
568        assert_eq!(dir.name(), "dir");
569        assert_eq!(
570            dir.value(),
571            &TreeValue::Tree(TreeId(dir_tree_id.as_bytes().to_vec()))
572        );
573
574        let dir_tree = store
575            .read_tree(
576                &DirRepoPath::from("dir/"),
577                &TreeId(dir_tree_id.as_bytes().to_vec()),
578            )
579            .unwrap();
580        let mut files = dir_tree.entries();
581        let normal_file = files.next().unwrap();
582        let symlink = files.next().unwrap();
583        assert_eq!(files.next(), None);
584        assert_eq!(normal_file.name(), "normal");
585        assert_eq!(
586            normal_file.value(),
587            &TreeValue::Normal {
588                id: FileId(blob1.as_bytes().to_vec()),
589                executable: false
590            }
591        );
592        assert_eq!(symlink.name(), "symlink");
593        assert_eq!(
594            symlink.value(),
595            &TreeValue::Symlink(SymlinkId(blob2.as_bytes().to_vec()))
596        );
597    }
598
599    #[test]
600    fn overlapping_git_commit_id() {
601        let temp_dir = tempfile::tempdir().unwrap();
602        let git_repo_path = temp_dir.path();
603        git2::Repository::init(git_repo_path.clone()).unwrap();
604        let store = GitStore::load(git_repo_path.to_owned());
605        let signature = Signature {
606            name: "Someone".to_string(),
607            email: "someone@example.com".to_string(),
608            timestamp: Timestamp {
609                timestamp: MillisSinceEpoch(0),
610                tz_offset: 0,
611            },
612        };
613        let commit1 = Commit {
614            parents: vec![],
615            predecessors: vec![],
616            root_tree: store.empty_tree_id().clone(),
617            change_id: ChangeId(vec![]),
618            description: "initial".to_string(),
619            author: signature.clone(),
620            committer: signature,
621            is_open: false,
622            is_pruned: false,
623        };
624        let commit_id1 = store.write_commit(&commit1).unwrap();
625        let mut commit2 = commit1;
626        commit2.predecessors.push(commit_id1.clone());
627        let expected_error_message = format!("note for '{}' exists already", commit_id1.hex());
628        match store.write_commit(&commit2) {
629            Ok(_) => {
630                panic!("expectedly successfully wrote two commits with the same git commit object")
631            }
632            Err(StoreError::Other(message)) if message.contains(&expected_error_message) => {}
633            Err(err) => panic!("unexpected error: {:?}", err),
634        };
635    }
636}