1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
#[macro_use]
extern crate slog;
use anyhow::{anyhow, Result};

mod commute;
mod owned;
mod stack;

use std::io::Write;

pub struct Config<'a> {
    pub dry_run: bool,
    pub force: bool,
    pub base: Option<&'a str>,
    pub and_rebase: bool,
    pub logger: &'a slog::Logger,
}

pub fn run(config: &Config) -> Result<()> {
    let repo = git2::Repository::open_from_env()?;
    debug!(config.logger, "repository found"; "path" => repo.path().to_str());

    let stack = stack::working_stack(&repo, config.base, config.force, config.logger)?;
    if stack.is_empty() {
        crit!(config.logger, "No commits available to fix up, exiting");
        return Ok(());
    }

    let mut diff_options = Some({
        let mut ret = git2::DiffOptions::new();
        ret.context_lines(0)
            .id_abbrev(40)
            .ignore_filemode(true)
            .ignore_submodules(true);
        ret
    });

    let (stack, summary_counts): (Vec<_>, _) = {
        let mut diffs = Vec::with_capacity(stack.len());
        for commit in &stack {
            let diff = owned::Diff::new(
                &repo.diff_tree_to_tree(
                    if commit.parents().len() == 0 {
                        None
                    } else {
                        Some(commit.parent(0)?.tree()?)
                    }
                    .as_ref(),
                    Some(&commit.tree()?),
                    diff_options.as_mut(),
                )?,
            )?;
            trace!(config.logger, "parsed commit diff";
                   "commit" => commit.id().to_string(),
                   "diff" => format!("{:?}", diff),
            );
            diffs.push(diff);
        }

        let summary_counts = stack::summary_counts(&stack);
        (
            stack.into_iter().zip(diffs.into_iter()).collect(),
            summary_counts,
        )
    };

    let mut head_tree = repo.head()?.peel_to_tree()?;
    let index = owned::Diff::new(&repo.diff_tree_to_index(
        Some(&head_tree),
        None,
        diff_options.as_mut(),
    )?)?;
    trace!(config.logger, "parsed index";
           "index" => format!("{:?}", index),
    );

    let signature = repo
        .signature()
        .or_else(|_| git2::Signature::now("nobody", "nobody@example.com"))?;
    let mut head_commit = repo.head()?.peel_to_commit()?;

    let mut patches_considered = 0usize;
    'patch: for index_patch in index.iter() {
        let old_path = index_patch.new_path.as_slice();
        if index_patch.status != git2::Delta::Modified {
            debug!(config.logger, "skipped non-modified hunk";
                    "path" => String::from_utf8_lossy(old_path).into_owned(),
                    "status" => format!("{:?}", index_patch.status),
            );
            continue 'patch;
        }

        patches_considered += 1;

        let mut preceding_hunks_offset = 0isize;
        let mut applied_hunks_offset = 0isize;
        'hunk: for index_hunk in &index_patch.hunks {
            debug!(config.logger, "next hunk";
                   "header" => index_hunk.header(),
                   "path" => String::from_utf8_lossy(old_path).into_owned(),
            );

            // To properly handle files ("patches" in libgit2 lingo) with multiple hunks, we
            // need to find the updated line coordinates (`header`) of the current hunk in
            // two cases:
            // 1) As if it were the only hunk in the index. This only involves shifting the
            // "added" side *up* by the offset introduced by the preceding hunks:
            let isolated_hunk = index_hunk
                .clone()
                .shift_added_block(-preceding_hunks_offset);

            // 2) When applied on top of the previously committed hunks. This requires shifting
            // both the "added" and the "removed" sides of the previously isolated hunk *down*
            // by the offset of the committed hunks:
            let hunk_to_apply = isolated_hunk
                .clone()
                .shift_both_blocks(applied_hunks_offset);

            // The offset is the number of lines added minus the number of lines removed by a hunk:
            let hunk_offset = index_hunk.changed_offset();

            // To aid in understanding these arithmetics, here's an illustration.
            // There are two hunks in the original patch, each adding one line ("line2" and
            // "line5"). Assuming the first hunk (with offset = -1) was already proceesed
            // and applied, the table shows the three versions of the patch, with line numbers
            // on the <A>dded and <R>emoved sides for each:
            // |----------------|-----------|------------------|
            // |                |           | applied on top   |
            // | original patch | isolated  | of the preceding |
            // |----------------|-----------|------------------|
            // | <R> <A>        | <R> <A>   | <R> <A>          |
            // |----------------|-----------|------------------|
            // |  1   1  line1  |  1   1    |  1   1   line1   |
            // |  2      line2  |  2   2    |  2   2   line3   |
            // |  3   2  line3  |  3   3    |  3   3   line4   |
            // |  4   3  line4  |  4   4    |  4       line5   |
            // |  5      line5  |  5        |                  |
            // |----------------|-----------|------------------|
            // |       So the second hunk's `header` is:       |
            // |   -5,1 +3,0    | -5,1 +4,0 |    -4,1 +3,0     |
            // |----------------|-----------|------------------|

            debug!(config.logger, "";
                "to apply" => hunk_to_apply.header(),
                "to commute" => isolated_hunk.header(),
                "preceding hunks" => format!("{}/{}", applied_hunks_offset, preceding_hunks_offset),
            );

            preceding_hunks_offset += hunk_offset;

            // find the newest commit that the hunk cannot commute with
            let mut dest_commit = None;
            let mut commuted_old_path = old_path;
            let mut commuted_index_hunk = isolated_hunk;

            'commit: for &(ref commit, ref diff) in &stack {
                let c_logger = config.logger.new(o!(
                    "commit" => commit.id().to_string(),
                ));
                let next_patch = match diff.by_new(commuted_old_path) {
                    Some(patch) => patch,
                    // this commit doesn't touch the hunk's file, so
                    // they trivially commute, and the next commit
                    // should be considered
                    None => {
                        debug!(c_logger, "skipped commit with no path");
                        continue 'commit;
                    }
                };
                if next_patch.status == git2::Delta::Added {
                    debug!(c_logger, "found noncommutative commit by add");
                    dest_commit = Some(commit);
                    break 'commit;
                }
                if commuted_old_path != next_patch.old_path.as_slice() {
                    debug!(c_logger, "changed commute path";
                           "path" => String::from_utf8_lossy(&next_patch.old_path).into_owned(),
                    );
                    commuted_old_path = next_patch.old_path.as_slice();
                }
                commuted_index_hunk = match commute::commute_diff_before(
                    &commuted_index_hunk,
                    &next_patch.hunks,
                ) {
                    Some(hunk) => {
                        debug!(c_logger, "commuted hunk with commit";
                               "offset" => (hunk.added.start as i64) - (commuted_index_hunk.added.start as i64),
                        );
                        hunk
                    }
                    // this commit contains a hunk that cannot
                    // commute with the hunk being absorbed
                    None => {
                        debug!(c_logger, "found noncommutative commit by conflict");
                        dest_commit = Some(commit);
                        break 'commit;
                    }
                };
            }
            let dest_commit = match dest_commit {
                Some(commit) => commit,
                // the hunk commutes with every commit in the stack,
                // so there is no commit to absorb it into
                None => {
                    warn!(
                        config.logger,
                        "Could not find a commit to fix up, use \
                         --base to increase the search range."
                    );
                    continue 'hunk;
                }
            };

            // TODO: the git2 api only supports utf8 commit messages,
            // so it's okay to use strings instead of bytes here
            // https://docs.rs/git2/0.7.5/src/git2/repo.rs.html#998
            // https://libgit2.org/libgit2/#HEAD/group/commit/git_commit_create
            let dest_commit_id = dest_commit.id().to_string();
            let dest_commit_locator = dest_commit
                .summary()
                .filter(|&msg| summary_counts[msg] == 1)
                .unwrap_or(&dest_commit_id);
            if !config.dry_run {
                head_tree =
                    apply_hunk_to_tree(&repo, &head_tree, &hunk_to_apply, &index_patch.old_path)?;
                head_commit = repo.find_commit(repo.commit(
                    Some("HEAD"),
                    &signature,
                    &signature,
                    &format!("fixup! {}\n", dest_commit_locator),
                    &head_tree,
                    &[&head_commit],
                )?)?;
                info!(config.logger, "committed";
                      "commit" => head_commit.id().to_string(),
                      "header" => hunk_to_apply.header(),
                );
            } else {
                info!(config.logger, "would have committed";
                      "fixup" => dest_commit_locator,
                      "header" => hunk_to_apply.header(),
                );
            }
            applied_hunks_offset += hunk_offset;
        }
    }

    if patches_considered == 0 {
        warn!(
            config.logger,
            "No additions staged, try adding something to the index."
        );
    } else if config.and_rebase {
        use std::process::Command;
        // unwrap() is safe here, as we exit early if the stack is empty
        let last_commit_in_stack = &stack.last().unwrap().0;
        // The stack isn't supposed to have any merge commits, per the check in working_stack()
        let number_of_parents = last_commit_in_stack.parents().len();
        assert!(number_of_parents <= 1);

        let mut command = Command::new("git");
        command.args(&["rebase", "--interactive", "--autosquash"]);

        if number_of_parents == 0 {
            command.arg("--root");
        } else {
            // Use a range that is guaranteed to include all the commits we might have
            // committed "fixup!" commits for.
            let base_commit_sha = last_commit_in_stack.parent(0)?.id().to_string();
            command.arg(&base_commit_sha);
        }

        // Don't check that we have successfully absorbed everything, nor git's
        // exit code -- as git will print helpful messages on its own.
        command.status().expect("could not run git rebase");
    }

    Ok(())
}

fn apply_hunk_to_tree<'repo>(
    repo: &'repo git2::Repository,
    base: &git2::Tree,
    hunk: &owned::Hunk,
    path: &[u8],
) -> Result<git2::Tree<'repo>> {
    let mut treebuilder = repo.treebuilder(Some(base))?;

    // recurse into nested tree if applicable
    if let Some(slash) = path.iter().position(|&x| x == b'/') {
        let (first, rest) = path.split_at(slash);
        let rest = &rest[1..];

        let (subtree, submode) = {
            let entry = treebuilder
                .get(first)?
                .ok_or_else(|| anyhow!("couldn't find tree entry in tree for path"))?;
            (repo.find_tree(entry.id())?, entry.filemode())
        };
        // TODO: loop instead of recursing to avoid potential stack overflow
        let result_subtree = apply_hunk_to_tree(repo, &subtree, hunk, rest)?;

        treebuilder.insert(first, result_subtree.id(), submode)?;
        return Ok(repo.find_tree(treebuilder.write()?)?);
    }

    let (blob, mode) = {
        let entry = treebuilder
            .get(path)?
            .ok_or_else(|| anyhow!("couldn't find blob entry in tree for path"))?;
        (repo.find_blob(entry.id())?, entry.filemode())
    };

    // TODO: convert path to OsStr and pass it during blob_writer
    // creation, to get gitattributes handling (note that converting
    // &[u8] to &std::path::Path is only possible on unixy platforms)
    let mut blobwriter = repo.blob_writer(None)?;
    let old_content = blob.content();
    let (old_start, _, _, _) = hunk.anchors();

    // first, write the lines from the old content that are above the
    // hunk
    let old_content = {
        let (pre, post) = split_lines_after(old_content, old_start);
        blobwriter.write_all(pre)?;
        post
    };
    // next, write the added side of the hunk
    for line in &*hunk.added.lines {
        blobwriter.write_all(line)?;
    }
    // if this hunk removed lines from the old content, those must be
    // skipped
    let (_, old_content) = split_lines_after(old_content, hunk.removed.lines.len());
    // finally, write the remaining lines of the old content
    blobwriter.write_all(old_content)?;

    treebuilder.insert(path, blobwriter.commit()?, mode)?;
    Ok(repo.find_tree(treebuilder.write()?)?)
}

/// Return slices for lines [1..n] and [n+1; ...]
fn split_lines_after(content: &[u8], n: usize) -> (&[u8], &[u8]) {
    let split_index = if n > 0 {
        memchr::Memchr::new(b'\n', content)
            .fuse() // TODO: is fuse necessary here?
            .nth(n - 1) // the position of '\n' ending the `n`-th line
            .map(|x| x + 1)
            .unwrap_or_else(|| content.len())
    } else {
        0
    };
    content.split_at(split_index)
}