Skip to main content

open_kioku_git/
lib.rs

1mod ownership;
2mod reviewers;
3
4use chrono::{DateTime, Utc};
5use open_kioku_core::{
6    GitChangeKind, GitCommitId, GitCommitRecord, GitFileTouch, HistoryRecordId, LineRange, Owner,
7};
8use open_kioku_errors::{OkError, Result};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::path::{Path, PathBuf};
13use std::process::Command;
14
15const COMMIT_RECORD_SEPARATOR: u8 = 0x1e;
16const GIT_COMMIT_FORMAT: &str =
17    "--format=%x1e%H%x00%P%x00%an%x00%ae%x00%aI%x00%cn%x00%ce%x00%cI%x00%s%x00%B%x00";
18
19pub use ownership::{ownership_for_path, OwnershipInput};
20pub use reviewers::{suggest_reviewers, ReviewerSuggestionInput};
21
22#[derive(Debug, Clone, PartialEq)]
23pub struct CommitHistory {
24    pub commits: Vec<GitCommitRecord>,
25    pub file_touches: Vec<GitFileTouch>,
26}
27
28impl CommitHistory {
29    pub fn empty() -> Self {
30        Self {
31            commits: Vec::new(),
32            file_touches: Vec::new(),
33        }
34    }
35}
36
37#[derive(Debug, Clone, PartialEq)]
38pub struct CochangeRecord {
39    pub path: PathBuf,
40    pub cochanged_path: PathBuf,
41    pub commit_count: usize,
42    pub recency_weight: f32,
43    pub test_corun: bool,
44    pub commits: Vec<String>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct CommitPatch {
49    pub commit_id: GitCommitId,
50    pub files: Vec<FilePatch>,
51}
52
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct FilePatch {
55    pub path: PathBuf,
56    pub previous_path: Option<PathBuf>,
57    pub line_ranges: Vec<LineRange>,
58}
59
60#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
61pub struct DiffFile {
62    pub old_path: Option<PathBuf>,
63    pub new_path: Option<PathBuf>,
64    pub status: GitChangeKind,
65    pub rename_score: Option<u8>,
66    pub hunks: Vec<DiffHunk>,
67}
68
69impl DiffFile {
70    pub fn changed_line_ranges(&self) -> Vec<LineRange> {
71        self.hunks
72            .iter()
73            .filter_map(|hunk| hunk.new_range.clone())
74            .collect()
75    }
76}
77
78#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79pub struct DiffHunk {
80    pub old_range: Option<LineRange>,
81    pub new_range: Option<LineRange>,
82}
83
84pub fn discover_root(start: impl AsRef<Path>) -> Result<PathBuf> {
85    let mut current = start.as_ref().canonicalize()?;
86    loop {
87        if current.join(".git").exists() || current.join("ok.toml").exists() {
88            return Ok(current);
89        }
90        if !current.pop() {
91            return Ok(start.as_ref().canonicalize()?);
92        }
93    }
94}
95
96pub fn branch(root: impl AsRef<Path>) -> Option<String> {
97    let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
98    if let Some(value) = head.strip_prefix("ref: refs/heads/") {
99        return Some(value.trim().to_string());
100    }
101    None
102}
103
104pub fn commit(root: impl AsRef<Path>) -> Option<String> {
105    let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
106    if !head.starts_with("ref: ") {
107        return Some(head.trim().to_string());
108    }
109    let reference = head.trim().strip_prefix("ref: ")?;
110    fs::read_to_string(root.as_ref().join(".git").join(reference))
111        .ok()
112        .map(|value| value.trim().to_string())
113}
114
115pub fn require_repo(root: impl AsRef<Path>) -> Result<PathBuf> {
116    let root = discover_root(root)?;
117    if !root.exists() {
118        return Err(OkError::Repository(format!(
119            "repository root does not exist: {}",
120            root.display()
121        )));
122    }
123    Ok(root)
124}
125
126pub fn cochange_records(
127    root: impl AsRef<Path>,
128    max_commits: usize,
129    max_files_per_commit: usize,
130) -> Result<Vec<CochangeRecord>> {
131    let history = commit_history(root, max_commits)?;
132    Ok(cochange_records_from_history(
133        &history,
134        max_files_per_commit,
135    ))
136}
137
138pub fn commit_history(root: impl AsRef<Path>, max_commits: usize) -> Result<CommitHistory> {
139    let root = root.as_ref();
140    if !root.join(".git").exists() || max_commits == 0 {
141        return Ok(CommitHistory::empty());
142    }
143    let head = Command::new("git")
144        .arg("-C")
145        .arg(root)
146        .args(["rev-parse", "--verify", "HEAD"])
147        .output()
148        .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
149    if !head.status.success() {
150        return Ok(CommitHistory::empty());
151    }
152    let output = Command::new("git")
153        .arg("-C")
154        .arg(root)
155        .arg("log")
156        .arg(format!("--max-count={max_commits}"))
157        .args([
158            "--no-show-signature",
159            "--no-color",
160            "--no-decorate",
161            "--encoding=UTF-8",
162            "--date=iso-strict",
163            "--find-renames",
164            GIT_COMMIT_FORMAT,
165            "--name-status",
166            "-z",
167        ])
168        .output()
169        .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
170    if !output.status.success() {
171        let stderr = String::from_utf8_lossy(&output.stderr);
172        return Err(OkError::Repository(format!(
173            "git history scan failed: {}",
174            stderr.trim()
175        )));
176    }
177    parse_commit_history(&output.stdout)
178}
179
180pub fn commit_patches(root: impl AsRef<Path>, max_commits: usize) -> Result<Vec<CommitPatch>> {
181    let root = root.as_ref();
182    if !root.join(".git").exists() || max_commits == 0 {
183        return Ok(Vec::new());
184    }
185    let head = Command::new("git")
186        .arg("-C")
187        .arg(root)
188        .args(["rev-parse", "--verify", "HEAD"])
189        .output()
190        .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
191    if !head.status.success() {
192        return Ok(Vec::new());
193    }
194    let output = Command::new("git")
195        .arg("-C")
196        .arg(root)
197        .args(["-c", "core.quotePath=true"])
198        .arg("log")
199        .arg(format!("--max-count={max_commits}"))
200        .args([
201            "--no-show-signature",
202            "--no-color",
203            "--no-decorate",
204            "--encoding=UTF-8",
205            "--find-renames",
206            "--format=%x1e%H%x00",
207            "--patch",
208            "--unified=0",
209            "--no-ext-diff",
210            "--no-textconv",
211        ])
212        .output()
213        .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
214    if !output.status.success() {
215        let stderr = String::from_utf8_lossy(&output.stderr);
216        return Err(OkError::Repository(format!(
217            "git patch scan failed: {}",
218            stderr.trim()
219        )));
220    }
221    parse_commit_patches(&output.stdout)
222}
223
224pub fn diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
225    run_diff_name_status(root, &[])
226}
227
228pub fn diff_name_status_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
229    run_diff_name_status(root, &[since])
230}
231
232pub fn cached_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
233    run_diff_name_status(root, &["--cached"])
234}
235
236pub fn head_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
237    run_diff_name_status(root, &["HEAD"])
238}
239
240pub fn diff_unified_zero(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
241    run_diff_unified_zero(root, &[])
242}
243
244pub fn diff_unified_zero_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
245    run_diff_unified_zero(root, &[since])
246}
247
248fn run_diff_unified_zero(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
249    let root = root.as_ref();
250    if !root.join(".git").exists() {
251        return Ok(Vec::new());
252    }
253    let output = Command::new("git")
254        .arg("-C")
255        .arg(root)
256        .args(["-c", "core.quotePath=true"])
257        .arg("diff")
258        .args(extra_args)
259        .args(["--unified=0", "--no-ext-diff", "--no-textconv"])
260        .output()
261        .map_err(|err| OkError::Repository(format!("git diff failed: {err}")))?;
262    if !output.status.success() {
263        let stderr = String::from_utf8_lossy(&output.stderr);
264        return Err(OkError::Repository(format!(
265            "git diff failed: {}",
266            stderr.trim()
267        )));
268    }
269    parse_unified_zero_diff(&git_text(&output.stdout, "diff output")?)
270}
271
272fn run_diff_name_status(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
273    let root = root.as_ref();
274    if !root.join(".git").exists() {
275        return Ok(Vec::new());
276    }
277    let output = Command::new("git")
278        .arg("-C")
279        .arg(root)
280        .arg("diff")
281        .args(extra_args)
282        .args(["--name-status", "--find-renames"])
283        .output()
284        .map_err(|err| OkError::Repository(format!("git diff --name-status failed: {err}")))?;
285    if !output.status.success() {
286        let stderr = String::from_utf8_lossy(&output.stderr);
287        return Err(OkError::Repository(format!(
288            "git diff --name-status failed: {}",
289            stderr.trim()
290        )));
291    }
292    parse_diff_name_status(&git_text(&output.stdout, "diff name-status output")?)
293}
294
295pub fn cochange_records_from_history(
296    history: &CommitHistory,
297    max_files_per_commit: usize,
298) -> Vec<CochangeRecord> {
299    if max_files_per_commit < 2 {
300        return Vec::new();
301    }
302    let mut files_by_commit = HashMap::<&str, Vec<PathBuf>>::new();
303    for touch in &history.file_touches {
304        files_by_commit
305            .entry(touch.commit_id.0.as_str())
306            .or_default()
307            .push(touch.path.clone());
308    }
309    let mut pairs: HashMap<(PathBuf, PathBuf), CochangeRecord> = HashMap::new();
310    for (idx, commit) in history.commits.iter().enumerate() {
311        let mut files = files_by_commit
312            .remove(commit.id.0.as_str())
313            .unwrap_or_default();
314        files.sort();
315        files.dedup();
316        if files.len() < 2 || files.len() > max_files_per_commit {
317            continue;
318        }
319        let recency_weight = 1.0 / (1.0 + idx as f32 / 25.0);
320        for left in &files {
321            for right in &files {
322                if left == right {
323                    continue;
324                }
325                let key = (left.clone(), right.clone());
326                let entry = pairs.entry(key).or_insert_with(|| CochangeRecord {
327                    path: left.clone(),
328                    cochanged_path: right.clone(),
329                    commit_count: 0,
330                    recency_weight: 0.0,
331                    test_corun: is_test_path(right),
332                    commits: Vec::new(),
333                });
334                entry.commit_count += 1;
335                entry.recency_weight += recency_weight;
336                entry.test_corun |= is_test_path(right);
337                if entry.commits.len() < 5 {
338                    entry.commits.push(commit.id.0.clone());
339                }
340            }
341        }
342    }
343    let mut records = pairs.into_values().collect::<Vec<_>>();
344    records.sort_by(|a, b| {
345        b.recency_weight
346            .partial_cmp(&a.recency_weight)
347            .unwrap_or(std::cmp::Ordering::Equal)
348            .then_with(|| b.commit_count.cmp(&a.commit_count))
349            .then_with(|| a.path.cmp(&b.path))
350            .then_with(|| a.cochanged_path.cmp(&b.cochanged_path))
351    });
352    records
353}
354
355fn parse_commit_history(raw: &[u8]) -> Result<CommitHistory> {
356    let mut history = CommitHistory::empty();
357    for record in raw
358        .split(|byte| *byte == COMMIT_RECORD_SEPARATOR)
359        .filter(|record| !record.is_empty())
360    {
361        let fields = record.splitn(11, |byte| *byte == 0).collect::<Vec<_>>();
362        if fields.len() != 11 {
363            return Err(OkError::Repository(format!(
364                "git history record has {} fields; expected commit metadata and file statuses",
365                fields.len()
366            )));
367        }
368        let sha = git_text(fields[0], "commit id")?;
369        let parent_ids = git_text(fields[1], "parent commit ids")?
370            .split_whitespace()
371            .map(|id| GitCommitId::new(id.to_string()))
372            .collect::<Vec<_>>();
373        let author = owner(
374            git_text(fields[2], "author name")?,
375            git_text(fields[3], "author email")?,
376            "author",
377        )?;
378        let authored_at = git_timestamp(fields[4], "authored timestamp")?;
379        let committer = owner(
380            git_text(fields[5], "committer name")?,
381            git_text(fields[6], "committer email")?,
382            "committer",
383        )?;
384        let committed_at = git_timestamp(fields[7], "committed timestamp")?;
385        let mut summary = git_text(fields[8], "commit summary")?;
386        let message = git_text(fields[9], "commit message")?
387            .trim_end_matches(['\r', '\n'])
388            .to_string();
389        if summary.trim().is_empty() {
390            summary = message.lines().next().unwrap_or_default().to_string();
391        }
392        let commit_id = GitCommitId::new(sha);
393        let mut touches = parse_file_touches(fields[10], &commit_id, committed_at)?;
394        let file_count = touches.len();
395        history.commits.push(GitCommitRecord {
396            id: commit_id,
397            parent_ids,
398            author,
399            committer: Some(committer),
400            authored_at,
401            committed_at,
402            summary,
403            message,
404            file_count,
405        });
406        history.file_touches.append(&mut touches);
407    }
408    Ok(history)
409}
410
411fn parse_commit_patches(raw: &[u8]) -> Result<Vec<CommitPatch>> {
412    let mut commits = Vec::new();
413    let starts = patch_record_starts(raw);
414    if starts.is_empty() && !raw.is_empty() {
415        return Err(OkError::Repository(
416            "git patch output is missing a commit record".into(),
417        ));
418    }
419    for (index, start) in starts.iter().enumerate() {
420        let end = starts.get(index + 1).copied().unwrap_or(raw.len());
421        let record = &raw[start + 1..end];
422        let Some(metadata_end) = record.iter().position(|byte| *byte == 0) else {
423            return Err(OkError::Repository(
424                "git patch record is missing its commit delimiter".into(),
425            ));
426        };
427        let commit_id = GitCommitId::new(git_text(&record[..metadata_end], "commit id")?);
428        let patch = git_text(&record[metadata_end + 1..], "patch")?;
429        commits.push(CommitPatch {
430            commit_id,
431            files: parse_file_patches(&patch)?,
432        });
433    }
434    Ok(commits)
435}
436
437fn patch_record_starts(raw: &[u8]) -> Vec<usize> {
438    raw.iter()
439        .enumerate()
440        .filter_map(|(index, byte)| {
441            if *byte != COMMIT_RECORD_SEPARATOR {
442                return None;
443            }
444            let commit_start = index + 1;
445            [40, 64].into_iter().find_map(|length| {
446                let commit_end = commit_start + length;
447                (raw.get(commit_end) == Some(&0)
448                    && raw
449                        .get(commit_start..commit_end)
450                        .is_some_and(|commit| commit.iter().all(u8::is_ascii_hexdigit)))
451                .then_some(index)
452            })
453        })
454        .collect()
455}
456
457fn parse_file_patches(patch: &str) -> Result<Vec<FilePatch>> {
458    #[derive(Default)]
459    struct PendingPatch {
460        path: Option<PathBuf>,
461        previous_path: Option<PathBuf>,
462        line_ranges: Vec<LineRange>,
463    }
464
465    fn finish(patches: &mut Vec<FilePatch>, pending: &mut PendingPatch) {
466        if let Some(path) = pending.path.take() {
467            patches.push(FilePatch {
468                path,
469                previous_path: pending.previous_path.take(),
470                line_ranges: std::mem::take(&mut pending.line_ranges),
471            });
472        } else {
473            pending.previous_path = None;
474            pending.line_ranges.clear();
475        }
476    }
477
478    let mut patches = Vec::new();
479    let mut pending = PendingPatch::default();
480    for line in patch.lines() {
481        if line.starts_with("diff --git ") {
482            finish(&mut patches, &mut pending);
483        } else if let Some(value) = line.strip_prefix("rename from ") {
484            pending.previous_path = Some(parse_patch_path(value, None)?);
485        } else if let Some(value) = line.strip_prefix("rename to ") {
486            pending.path = Some(parse_patch_path(value, None)?);
487        } else if let Some(value) = line.strip_prefix("+++ ") {
488            if value != "/dev/null" {
489                pending.path = Some(parse_patch_path(value, Some("b/"))?);
490            }
491        } else if line.starts_with("@@ ") {
492            if let Some(range) = parse_new_hunk_range(line)? {
493                pending.line_ranges.push(range);
494            }
495        }
496    }
497    finish(&mut patches, &mut pending);
498    Ok(patches)
499}
500
501fn parse_diff_name_status(raw: &str) -> Result<Vec<DiffFile>> {
502    raw.lines()
503        .filter(|line| !line.trim().is_empty())
504        .map(|line| {
505            let mut fields = line.split('\t').collect::<Vec<_>>();
506            if fields.len() < 2 {
507                fields = line.split_whitespace().collect();
508            }
509            let status = fields.first().copied().unwrap_or_default();
510            if fields.len() < 2 {
511                return Err(OkError::Repository(format!(
512                    "git diff name-status entry is missing a path: `{line}`"
513                )));
514            }
515            let kind = change_kind(status.as_bytes());
516            let rename_score = status
517                .strip_prefix('R')
518                .or_else(|| status.strip_prefix('C'))
519                .and_then(|score| score.parse::<u8>().ok());
520            match kind {
521                GitChangeKind::Renamed | GitChangeKind::Copied => {
522                    if fields.len() < 3 {
523                        return Err(OkError::Repository(format!(
524                            "git diff name-status rename is missing paths: `{line}`"
525                        )));
526                    }
527                    Ok(DiffFile {
528                        old_path: Some(parse_patch_path(fields[1], None)?),
529                        new_path: Some(parse_patch_path(fields[2], None)?),
530                        status: kind,
531                        rename_score,
532                        hunks: Vec::new(),
533                    })
534                }
535                GitChangeKind::Deleted => Ok(DiffFile {
536                    old_path: Some(parse_patch_path(fields[1], None)?),
537                    new_path: None,
538                    status: kind,
539                    rename_score,
540                    hunks: Vec::new(),
541                }),
542                _ => Ok(DiffFile {
543                    old_path: None,
544                    new_path: Some(parse_patch_path(fields[1], None)?),
545                    status: kind,
546                    rename_score,
547                    hunks: Vec::new(),
548                }),
549            }
550        })
551        .collect()
552}
553
554fn parse_unified_zero_diff(patch: &str) -> Result<Vec<DiffFile>> {
555    #[derive(Default)]
556    struct PendingDiff {
557        old_path: Option<PathBuf>,
558        new_path: Option<PathBuf>,
559        status: Option<GitChangeKind>,
560        rename_score: Option<u8>,
561        hunks: Vec<DiffHunk>,
562    }
563
564    fn finish(files: &mut Vec<DiffFile>, pending: &mut PendingDiff) {
565        if pending.old_path.is_none() && pending.new_path.is_none() {
566            pending.hunks.clear();
567            pending.status = None;
568            pending.rename_score = None;
569            return;
570        }
571        let status = pending.status.unwrap_or_else(|| {
572            if pending.old_path.is_none() {
573                GitChangeKind::Added
574            } else if pending.new_path.is_none() {
575                GitChangeKind::Deleted
576            } else if pending.old_path != pending.new_path {
577                GitChangeKind::Renamed
578            } else {
579                GitChangeKind::Modified
580            }
581        });
582        files.push(DiffFile {
583            old_path: pending.old_path.take(),
584            new_path: pending.new_path.take(),
585            status,
586            rename_score: pending.rename_score.take(),
587            hunks: std::mem::take(&mut pending.hunks),
588        });
589    }
590
591    let mut files = Vec::new();
592    let mut pending = PendingDiff::default();
593    for line in patch.lines() {
594        if line.starts_with("diff --git ") {
595            finish(&mut files, &mut pending);
596        } else if line.starts_with("new file mode ") {
597            pending.status = Some(GitChangeKind::Added);
598        } else if line.starts_with("deleted file mode ") {
599            pending.status = Some(GitChangeKind::Deleted);
600        } else if let Some(score) = line.strip_prefix("similarity index ") {
601            pending.rename_score = score.trim_end_matches('%').parse::<u8>().ok();
602        } else if let Some(value) = line.strip_prefix("rename from ") {
603            pending.old_path = Some(parse_patch_path(value, None)?);
604            pending.status = Some(GitChangeKind::Renamed);
605        } else if let Some(value) = line.strip_prefix("rename to ") {
606            pending.new_path = Some(parse_patch_path(value, None)?);
607            pending.status = Some(GitChangeKind::Renamed);
608        } else if let Some(value) = line.strip_prefix("--- ") {
609            if value != "/dev/null" {
610                pending.old_path = Some(parse_patch_path(value, Some("a/"))?);
611            }
612        } else if let Some(value) = line.strip_prefix("+++ ") {
613            if value != "/dev/null" {
614                pending.new_path = Some(parse_patch_path(value, Some("b/"))?);
615            }
616        } else if line.starts_with("@@ ") {
617            pending.hunks.push(parse_diff_hunk(line)?);
618        }
619    }
620    finish(&mut files, &mut pending);
621    Ok(files)
622}
623
624fn parse_diff_hunk(header: &str) -> Result<DiffHunk> {
625    let old = header
626        .split_whitespace()
627        .find(|part| part.starts_with('-'))
628        .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
629    let new = header
630        .split_whitespace()
631        .find(|part| part.starts_with('+'))
632        .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
633    Ok(DiffHunk {
634        old_range: parse_hunk_range(old.trim_start_matches('-'))?,
635        new_range: parse_hunk_range(new.trim_start_matches('+'))?,
636    })
637}
638
639fn parse_hunk_range(value: &str) -> Result<Option<LineRange>> {
640    let (start, count) = value.split_once(',').unwrap_or((value, "1"));
641    let start = start.parse::<u32>().map_err(|err| {
642        OkError::Repository(format!("git diff hunk start `{start}` is invalid: {err}"))
643    })?;
644    let count = count.parse::<u32>().map_err(|err| {
645        OkError::Repository(format!("git diff hunk count `{count}` is invalid: {err}"))
646    })?;
647    if count == 0 {
648        return Ok(None);
649    }
650    Ok(Some(LineRange {
651        start,
652        end: start.saturating_add(count - 1),
653    }))
654}
655
656fn parse_new_hunk_range(header: &str) -> Result<Option<LineRange>> {
657    let marker = header
658        .split_whitespace()
659        .find(|part| part.starts_with('+'))
660        .ok_or_else(|| OkError::Repository(format!("git patch hunk is malformed: `{header}`")))?;
661    let value = marker.trim_start_matches('+');
662    let (start, count) = value.split_once(',').unwrap_or((value, "1"));
663    let start = start.parse::<u32>().map_err(|err| {
664        OkError::Repository(format!("git patch hunk start `{start}` is invalid: {err}"))
665    })?;
666    let count = count.parse::<u32>().map_err(|err| {
667        OkError::Repository(format!("git patch hunk count `{count}` is invalid: {err}"))
668    })?;
669    if count == 0 {
670        return Ok(None);
671    }
672    Ok(Some(LineRange {
673        start,
674        end: start.saturating_add(count - 1),
675    }))
676}
677
678fn parse_patch_path(value: &str, prefix: Option<&str>) -> Result<PathBuf> {
679    let decoded = if value.starts_with('"') {
680        decode_git_quoted_path(value)?
681    } else {
682        value.to_string()
683    };
684    let decoded = prefix
685        .and_then(|prefix| decoded.strip_prefix(prefix))
686        .unwrap_or(&decoded);
687    Ok(PathBuf::from(decoded))
688}
689
690fn decode_git_quoted_path(value: &str) -> Result<String> {
691    let Some(inner) = value
692        .strip_prefix('"')
693        .and_then(|value| value.strip_suffix('"'))
694    else {
695        return Err(OkError::Repository(format!(
696            "git patch path has invalid quoting: `{value}`"
697        )));
698    };
699    let mut bytes = Vec::with_capacity(inner.len());
700    let mut chars = inner.as_bytes().iter().copied().peekable();
701    while let Some(byte) = chars.next() {
702        if byte != b'\\' {
703            bytes.push(byte);
704            continue;
705        }
706        let escaped = chars.next().ok_or_else(|| {
707            OkError::Repository(format!("git patch path has a trailing escape: `{value}`"))
708        })?;
709        match escaped {
710            b'\\' | b'"' => bytes.push(escaped),
711            b'a' => bytes.push(0x07),
712            b'b' => bytes.push(0x08),
713            b't' => bytes.push(b'\t'),
714            b'n' => bytes.push(b'\n'),
715            b'v' => bytes.push(0x0b),
716            b'f' => bytes.push(0x0c),
717            b'r' => bytes.push(b'\r'),
718            b'0'..=b'7' => {
719                let mut octal = vec![escaped];
720                for _ in 0..2 {
721                    if chars.peek().is_some_and(|byte| matches!(byte, b'0'..=b'7')) {
722                        octal.push(chars.next().expect("peeked octal byte"));
723                    } else {
724                        break;
725                    }
726                }
727                let decoded = std::str::from_utf8(&octal)
728                    .ok()
729                    .and_then(|value| u8::from_str_radix(value, 8).ok())
730                    .ok_or_else(|| {
731                        OkError::Repository("git patch path contains invalid octal escape".into())
732                    })?;
733                bytes.push(decoded);
734            }
735            other => bytes.push(other),
736        }
737    }
738    String::from_utf8(bytes)
739        .map_err(|err| OkError::Repository(format!("git patch path is not UTF-8: {err}")))
740}
741
742fn parse_file_touches(
743    raw: &[u8],
744    commit_id: &GitCommitId,
745    touched_at: DateTime<Utc>,
746) -> Result<Vec<GitFileTouch>> {
747    let mut tokens = raw.split(|byte| *byte == 0);
748    let mut touches = Vec::new();
749    while let Some(status) = next_status(&mut tokens) {
750        let change_kind = change_kind(status);
751        let rename_or_copy = matches!(change_kind, GitChangeKind::Renamed | GitChangeKind::Copied);
752        let first_path = next_path(&mut tokens, commit_id, status)?;
753        let (path, previous_path) = if rename_or_copy {
754            let current_path = next_path(&mut tokens, commit_id, status)?;
755            (current_path, Some(first_path))
756        } else {
757            (first_path, None)
758        };
759        let id = HistoryRecordId::new(format!("file-touch:{}:{}", commit_id.0, touches.len()));
760        touches.push(GitFileTouch {
761            id,
762            commit_id: commit_id.clone(),
763            path,
764            previous_path,
765            change_kind,
766            additions: None,
767            deletions: None,
768            touched_at,
769        });
770    }
771    Ok(touches)
772}
773
774fn next_status<'a>(tokens: &mut impl Iterator<Item = &'a [u8]>) -> Option<&'a [u8]> {
775    tokens
776        .map(trim_status_prefix)
777        .find(|token| !token.is_empty())
778}
779
780fn next_path<'a>(
781    tokens: &mut impl Iterator<Item = &'a [u8]>,
782    commit_id: &GitCommitId,
783    status: &[u8],
784) -> Result<PathBuf> {
785    let path = tokens.find(|token| !token.is_empty()).ok_or_else(|| {
786        OkError::Repository(format!(
787            "git history record for commit `{commit_id}` is missing a path after status `{}`",
788            String::from_utf8_lossy(status)
789        ))
790    })?;
791    Ok(PathBuf::from(git_text(path, "changed path")?))
792}
793
794fn trim_status_prefix(mut value: &[u8]) -> &[u8] {
795    while value
796        .first()
797        .is_some_and(|byte| matches!(byte, b'\r' | b'\n'))
798    {
799        value = &value[1..];
800    }
801    value
802}
803
804fn change_kind(status: &[u8]) -> GitChangeKind {
805    match status.first().copied() {
806        Some(b'A') => GitChangeKind::Added,
807        Some(b'M') => GitChangeKind::Modified,
808        Some(b'D') => GitChangeKind::Deleted,
809        Some(b'R') => GitChangeKind::Renamed,
810        Some(b'C') => GitChangeKind::Copied,
811        Some(b'T') => GitChangeKind::TypeChanged,
812        _ => GitChangeKind::Unknown,
813    }
814}
815
816fn owner(name: String, email: String, role: &str) -> Result<Owner> {
817    let name = name.trim().to_string();
818    let email = email.trim().to_string();
819    let name = if name.is_empty() { email.clone() } else { name };
820    if name.is_empty() {
821        return Err(OkError::Repository(format!(
822            "git history {role} identity is empty"
823        )));
824    }
825    Ok(Owner {
826        name,
827        email: (!email.is_empty()).then_some(email),
828    })
829}
830
831fn git_timestamp(raw: &[u8], field: &str) -> Result<DateTime<Utc>> {
832    let value = git_text(raw, field)?;
833    DateTime::parse_from_rfc3339(&value)
834        .map(|timestamp| timestamp.with_timezone(&Utc))
835        .map_err(|err| {
836            OkError::Repository(format!("git history {field} `{value}` is invalid: {err}"))
837        })
838}
839
840fn git_text(raw: &[u8], field: &str) -> Result<String> {
841    String::from_utf8(raw.to_vec()).map_err(|err| {
842        OkError::Repository(format!("git history {field} is not valid UTF-8: {err}"))
843    })
844}
845
846fn is_test_path(path: &Path) -> bool {
847    let value = path.to_string_lossy().to_ascii_lowercase();
848    value.contains("/test/")
849        || value.contains("/tests/")
850        || value.ends_with("_test.rs")
851        || value.ends_with("_test.go")
852        || value.ends_with(".test.ts")
853        || value.ends_with(".spec.ts")
854        || value.ends_with("test.java")
855        || value.ends_with("tests.java")
856}
857
858#[cfg(test)]
859mod tests {
860    use super::{
861        cochange_records, commit_history, commit_patches, parse_commit_patches,
862        parse_diff_name_status, parse_file_patches, parse_unified_zero_diff,
863    };
864    use open_kioku_core::GitChangeKind;
865    use std::fs;
866    use std::path::Path;
867    use std::process::Command;
868
869    #[test]
870    fn cochange_records_apply_recency_and_test_corun() {
871        let dir = tempfile::tempdir().unwrap();
872        run(dir.path(), &["init"]);
873        run(dir.path(), &["config", "user.email", "test@example.com"]);
874        run(dir.path(), &["config", "user.name", "Test User"]);
875
876        write(dir.path(), "src/old.rs", "fn old() {}\n");
877        write(
878            dir.path(),
879            "tests/old_test.rs",
880            "#[test] fn old_test() {}\n",
881        );
882        run(dir.path(), &["add", "."]);
883        run(dir.path(), &["commit", "-m", "old pair"]);
884
885        write(dir.path(), "src/new.rs", "fn new() {}\n");
886        write(
887            dir.path(),
888            "tests/new_test.rs",
889            "#[test] fn new_test() {}\n",
890        );
891        run(dir.path(), &["add", "."]);
892        run(dir.path(), &["commit", "-m", "new pair"]);
893
894        let records = cochange_records(dir.path(), 20, 10).unwrap();
895        let new_pair = records
896            .iter()
897            .find(|record| {
898                record.path == std::path::Path::new("src/new.rs")
899                    && record.cochanged_path == std::path::Path::new("tests/new_test.rs")
900            })
901            .unwrap();
902        let old_pair = records
903            .iter()
904            .find(|record| {
905                record.path == std::path::Path::new("src/old.rs")
906                    && record.cochanged_path == std::path::Path::new("tests/old_test.rs")
907            })
908            .unwrap();
909
910        assert!(new_pair.test_corun);
911        assert!(new_pair.recency_weight > old_pair.recency_weight);
912        assert_eq!(new_pair.commit_count, 1);
913    }
914
915    #[test]
916    fn commit_history_respects_window_and_keeps_every_file_touch() {
917        let dir = initialized_repo();
918        write(dir.path(), "src/old.rs", "fn old() {}\n");
919        commit_all(dir.path(), "old");
920        write(dir.path(), "src/a.rs", "fn a() {}\n");
921        write(dir.path(), "src/b.rs", "fn b() {}\n");
922        write(dir.path(), "tests/a_test.rs", "#[test] fn a() {}\n");
923        commit_all(dir.path(), "multi-file change");
924
925        let history = commit_history(dir.path(), 1).unwrap();
926
927        assert_eq!(history.commits.len(), 1);
928        assert_eq!(history.commits[0].summary, "multi-file change");
929        assert_eq!(history.commits[0].author.name, "Test User");
930        assert_eq!(
931            history.commits[0].author.email.as_deref(),
932            Some("test@example.com")
933        );
934        assert_eq!(history.commits[0].file_count, 3);
935        assert_eq!(history.file_touches.len(), 3);
936        assert!(history
937            .file_touches
938            .iter()
939            .all(|touch| touch.commit_id == history.commits[0].id));
940    }
941
942    #[test]
943    fn commit_history_captures_renames() {
944        let dir = initialized_repo();
945        write(dir.path(), "src/old.rs", "fn renamed() {}\n");
946        commit_all(dir.path(), "add old path");
947        run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
948        commit_all(dir.path(), "rename path");
949
950        let history = commit_history(dir.path(), 1).unwrap();
951        let touch = history.file_touches.first().unwrap();
952
953        assert_eq!(touch.change_kind, GitChangeKind::Renamed);
954        assert_eq!(
955            touch.previous_path.as_deref(),
956            Some(Path::new("src/old.rs"))
957        );
958        assert_eq!(touch.path, Path::new("src/new.rs"));
959    }
960
961    #[test]
962    fn commit_history_handles_empty_and_shallow_repositories() {
963        let empty = initialized_repo();
964        assert_eq!(
965            commit_history(empty.path(), 10).unwrap(),
966            super::CommitHistory::empty()
967        );
968
969        let origin = initialized_repo();
970        write(origin.path(), "src/one.rs", "fn one() {}\n");
971        commit_all(origin.path(), "one");
972        write(origin.path(), "src/two.rs", "fn two() {}\n");
973        commit_all(origin.path(), "two");
974
975        let clone_parent = tempfile::tempdir().unwrap();
976        let shallow = clone_parent.path().join("shallow");
977        let source = format!("file://{}", origin.path().canonicalize().unwrap().display());
978        let status = Command::new("git")
979            .args(["clone", "--quiet", "--depth", "1"])
980            .arg(source)
981            .arg(&shallow)
982            .status()
983            .unwrap();
984        assert!(status.success());
985
986        let history = commit_history(&shallow, 10).unwrap();
987        assert_eq!(history.commits.len(), 1);
988        assert_eq!(history.commits[0].summary, "two");
989    }
990
991    #[test]
992    fn commit_patches_capture_zero_context_line_ranges_and_renames() {
993        let dir = initialized_repo();
994        write(
995            dir.path(),
996            "src/old.rs",
997            "fn alpha() {\n    one();\n}\n\nfn beta() {\n    two();\n}\n",
998        );
999        commit_all(dir.path(), "add symbols");
1000        run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
1001        write(
1002            dir.path(),
1003            "src/new.rs",
1004            "fn alpha() {\n    changed();\n}\n\nfn beta() {\n    two();\n    added();\n}\n",
1005        );
1006        commit_all(dir.path(), "rename and modify");
1007
1008        let patches = commit_patches(dir.path(), 1).unwrap();
1009
1010        assert_eq!(patches.len(), 1);
1011        assert_eq!(patches[0].files.len(), 1);
1012        let file = &patches[0].files[0];
1013        assert_eq!(file.path, Path::new("src/new.rs"));
1014        assert_eq!(file.previous_path.as_deref(), Some(Path::new("src/old.rs")));
1015        assert_eq!(
1016            file.line_ranges,
1017            vec![
1018                open_kioku_core::LineRange { start: 2, end: 2 },
1019                open_kioku_core::LineRange { start: 7, end: 7 }
1020            ]
1021        );
1022    }
1023
1024    #[test]
1025    fn diff_name_status_parser_captures_added_modified_deleted_and_renamed() {
1026        let files = parse_diff_name_status(
1027            "A\tsrc/new.rs\n\
1028             M\tsrc/lib.rs\n\
1029             D\tsrc/old.rs\n\
1030             R087\tsrc/before.rs\tsrc/after.rs\n",
1031        )
1032        .unwrap();
1033
1034        assert_eq!(files.len(), 4);
1035        assert_eq!(files[0].status, GitChangeKind::Added);
1036        assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1037        assert_eq!(files[1].status, GitChangeKind::Modified);
1038        assert_eq!(files[2].status, GitChangeKind::Deleted);
1039        assert_eq!(files[2].old_path.as_deref(), Some(Path::new("src/old.rs")));
1040        assert_eq!(files[3].status, GitChangeKind::Renamed);
1041        assert_eq!(files[3].rename_score, Some(87));
1042        assert_eq!(
1043            files[3].old_path.as_deref(),
1044            Some(Path::new("src/before.rs"))
1045        );
1046        assert_eq!(
1047            files[3].new_path.as_deref(),
1048            Some(Path::new("src/after.rs"))
1049        );
1050    }
1051
1052    #[test]
1053    fn unified_zero_diff_parser_captures_old_new_hunks_and_changed_ranges() {
1054        let files = parse_unified_zero_diff(
1055            "diff --git a/src/old.rs b/src/new.rs\n\
1056             similarity index 92%\n\
1057             rename from src/old.rs\n\
1058             rename to src/new.rs\n\
1059             --- a/src/old.rs\n\
1060             +++ b/src/new.rs\n\
1061             @@ -2 +2 @@\n\
1062             -old();\n\
1063             +new();\n\
1064             @@ -8,0 +9,2 @@\n\
1065             +added();\n\
1066             +again();\n\
1067             diff --git a/src/deleted.rs b/src/deleted.rs\n\
1068             deleted file mode 100644\n\
1069             --- a/src/deleted.rs\n\
1070             +++ /dev/null\n\
1071             @@ -1,3 +0,0 @@\n",
1072        )
1073        .unwrap();
1074
1075        assert_eq!(files.len(), 2);
1076        assert_eq!(files[0].status, GitChangeKind::Renamed);
1077        assert_eq!(files[0].rename_score, Some(92));
1078        assert_eq!(files[0].old_path.as_deref(), Some(Path::new("src/old.rs")));
1079        assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1080        assert_eq!(
1081            files[0].hunks,
1082            vec![
1083                super::DiffHunk {
1084                    old_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1085                    new_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1086                },
1087                super::DiffHunk {
1088                    old_range: None,
1089                    new_range: Some(open_kioku_core::LineRange { start: 9, end: 10 }),
1090                }
1091            ]
1092        );
1093        assert_eq!(
1094            files[0].changed_line_ranges(),
1095            vec![
1096                open_kioku_core::LineRange { start: 2, end: 2 },
1097                open_kioku_core::LineRange { start: 9, end: 10 }
1098            ]
1099        );
1100        assert_eq!(files[1].status, GitChangeKind::Deleted);
1101        assert_eq!(
1102            files[1].hunks[0].old_range,
1103            Some(open_kioku_core::LineRange { start: 1, end: 3 })
1104        );
1105        assert_eq!(files[1].hunks[0].new_range, None);
1106    }
1107
1108    #[test]
1109    fn patch_parser_decodes_quoted_paths_and_ignores_deletion_ranges() {
1110        let patches = parse_file_patches(
1111            "diff --git \"a/src/space\\040name.rs\" \"b/src/space\\040name.rs\"\n\
1112             --- \"a/src/space\\040name.rs\"\n\
1113             +++ \"b/src/space\\040name.rs\"\n\
1114             @@ -3,2 +3,0 @@\n\
1115             @@ -8 +6,2 @@\n",
1116        )
1117        .unwrap();
1118
1119        assert_eq!(patches.len(), 1);
1120        assert_eq!(patches[0].path, Path::new("src/space name.rs"));
1121        assert_eq!(
1122            patches[0].line_ranges,
1123            vec![open_kioku_core::LineRange { start: 6, end: 7 }]
1124        );
1125    }
1126
1127    #[test]
1128    fn patch_parser_ignores_record_separator_bytes_inside_diff_content() {
1129        let mut raw = b"\x1e0123456789abcdef0123456789abcdef01234567\x00diff --git a/a.rs b/a.rs\n\
1130              +++ b/a.rs\n\
1131              @@ -0,0 +1 @@\n\
1132              +embedded "
1133            .to_vec();
1134        raw.push(0x1e);
1135        raw.extend_from_slice(b" byte\n");
1136
1137        let patches = parse_commit_patches(&raw).unwrap();
1138
1139        assert_eq!(patches.len(), 1);
1140        assert_eq!(patches[0].files.len(), 1);
1141        assert_eq!(patches[0].files[0].path, Path::new("a.rs"));
1142    }
1143
1144    fn initialized_repo() -> tempfile::TempDir {
1145        let dir = tempfile::tempdir().unwrap();
1146        run(dir.path(), &["init", "--quiet"]);
1147        run(dir.path(), &["config", "user.email", "test@example.com"]);
1148        run(dir.path(), &["config", "user.name", "Test User"]);
1149        run(dir.path(), &["config", "commit.gpgsign", "false"]);
1150        dir
1151    }
1152
1153    fn commit_all(root: &Path, message: &str) {
1154        run(root, &["add", "."]);
1155        run(root, &["commit", "--quiet", "-m", message]);
1156    }
1157
1158    fn write(root: &Path, path: &str, content: &str) {
1159        let path = root.join(path);
1160        fs::create_dir_all(path.parent().unwrap()).unwrap();
1161        fs::write(path, content).unwrap();
1162    }
1163
1164    fn run(root: &Path, args: &[&str]) {
1165        let status = Command::new("git")
1166            .arg("-C")
1167            .arg(root)
1168            .args(args)
1169            .status()
1170            .unwrap();
1171        assert!(status.success(), "git {args:?} failed");
1172    }
1173}