Skip to main content

open_kioku_git/
lib.rs

1use chrono::{DateTime, Utc};
2use open_kioku_core::{
3    GitChangeKind, GitCommitId, GitCommitRecord, GitFileTouch, HistoryRecordId, LineRange, Owner,
4};
5use open_kioku_errors::{OkError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::process::Command;
11
12const COMMIT_RECORD_SEPARATOR: u8 = 0x1e;
13const GIT_COMMIT_FORMAT: &str =
14    "--format=%x1e%H%x00%P%x00%an%x00%ae%x00%aI%x00%cn%x00%ce%x00%cI%x00%s%x00%B%x00";
15
16#[derive(Debug, Clone, PartialEq)]
17pub struct CommitHistory {
18    pub commits: Vec<GitCommitRecord>,
19    pub file_touches: Vec<GitFileTouch>,
20}
21
22impl CommitHistory {
23    pub fn empty() -> Self {
24        Self {
25            commits: Vec::new(),
26            file_touches: Vec::new(),
27        }
28    }
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub struct CochangeRecord {
33    pub path: PathBuf,
34    pub cochanged_path: PathBuf,
35    pub commit_count: usize,
36    pub recency_weight: f32,
37    pub test_corun: bool,
38    pub commits: Vec<String>,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct CommitPatch {
43    pub commit_id: GitCommitId,
44    pub files: Vec<FilePatch>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct FilePatch {
49    pub path: PathBuf,
50    pub previous_path: Option<PathBuf>,
51    pub line_ranges: Vec<LineRange>,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub struct DiffFile {
56    pub old_path: Option<PathBuf>,
57    pub new_path: Option<PathBuf>,
58    pub status: GitChangeKind,
59    pub rename_score: Option<u8>,
60    pub hunks: Vec<DiffHunk>,
61}
62
63impl DiffFile {
64    pub fn changed_line_ranges(&self) -> Vec<LineRange> {
65        self.hunks
66            .iter()
67            .filter_map(|hunk| hunk.new_range.clone())
68            .collect()
69    }
70}
71
72#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73pub struct DiffHunk {
74    pub old_range: Option<LineRange>,
75    pub new_range: Option<LineRange>,
76}
77
78pub fn discover_root(start: impl AsRef<Path>) -> Result<PathBuf> {
79    let mut current = start.as_ref().canonicalize()?;
80    loop {
81        if current.join(".git").exists() || current.join("ok.toml").exists() {
82            return Ok(current);
83        }
84        if !current.pop() {
85            return Ok(start.as_ref().canonicalize()?);
86        }
87    }
88}
89
90pub fn branch(root: impl AsRef<Path>) -> Option<String> {
91    let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
92    if let Some(value) = head.strip_prefix("ref: refs/heads/") {
93        return Some(value.trim().to_string());
94    }
95    None
96}
97
98pub fn commit(root: impl AsRef<Path>) -> Option<String> {
99    let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
100    if !head.starts_with("ref: ") {
101        return Some(head.trim().to_string());
102    }
103    let reference = head.trim().strip_prefix("ref: ")?;
104    fs::read_to_string(root.as_ref().join(".git").join(reference))
105        .ok()
106        .map(|value| value.trim().to_string())
107}
108
109pub fn require_repo(root: impl AsRef<Path>) -> Result<PathBuf> {
110    let root = discover_root(root)?;
111    if !root.exists() {
112        return Err(OkError::Repository(format!(
113            "repository root does not exist: {}",
114            root.display()
115        )));
116    }
117    Ok(root)
118}
119
120pub fn cochange_records(
121    root: impl AsRef<Path>,
122    max_commits: usize,
123    max_files_per_commit: usize,
124) -> Result<Vec<CochangeRecord>> {
125    let history = commit_history(root, max_commits)?;
126    Ok(cochange_records_from_history(
127        &history,
128        max_files_per_commit,
129    ))
130}
131
132pub fn commit_history(root: impl AsRef<Path>, max_commits: usize) -> Result<CommitHistory> {
133    let root = root.as_ref();
134    if !root.join(".git").exists() || max_commits == 0 {
135        return Ok(CommitHistory::empty());
136    }
137    let head = Command::new("git")
138        .arg("-C")
139        .arg(root)
140        .args(["rev-parse", "--verify", "HEAD"])
141        .output()
142        .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
143    if !head.status.success() {
144        return Ok(CommitHistory::empty());
145    }
146    let output = Command::new("git")
147        .arg("-C")
148        .arg(root)
149        .arg("log")
150        .arg(format!("--max-count={max_commits}"))
151        .args([
152            "--no-show-signature",
153            "--no-color",
154            "--no-decorate",
155            "--encoding=UTF-8",
156            "--date=iso-strict",
157            "--find-renames",
158            GIT_COMMIT_FORMAT,
159            "--name-status",
160            "-z",
161        ])
162        .output()
163        .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
164    if !output.status.success() {
165        let stderr = String::from_utf8_lossy(&output.stderr);
166        return Err(OkError::Repository(format!(
167            "git history scan failed: {}",
168            stderr.trim()
169        )));
170    }
171    parse_commit_history(&output.stdout)
172}
173
174pub fn commit_patches(root: impl AsRef<Path>, max_commits: usize) -> Result<Vec<CommitPatch>> {
175    let root = root.as_ref();
176    if !root.join(".git").exists() || max_commits == 0 {
177        return Ok(Vec::new());
178    }
179    let head = Command::new("git")
180        .arg("-C")
181        .arg(root)
182        .args(["rev-parse", "--verify", "HEAD"])
183        .output()
184        .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
185    if !head.status.success() {
186        return Ok(Vec::new());
187    }
188    let output = Command::new("git")
189        .arg("-C")
190        .arg(root)
191        .args(["-c", "core.quotePath=true"])
192        .arg("log")
193        .arg(format!("--max-count={max_commits}"))
194        .args([
195            "--no-show-signature",
196            "--no-color",
197            "--no-decorate",
198            "--encoding=UTF-8",
199            "--find-renames",
200            "--format=%x1e%H%x00",
201            "--patch",
202            "--unified=0",
203            "--no-ext-diff",
204            "--no-textconv",
205        ])
206        .output()
207        .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
208    if !output.status.success() {
209        let stderr = String::from_utf8_lossy(&output.stderr);
210        return Err(OkError::Repository(format!(
211            "git patch scan failed: {}",
212            stderr.trim()
213        )));
214    }
215    parse_commit_patches(&output.stdout)
216}
217
218pub fn diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
219    run_diff_name_status(root, &[])
220}
221
222pub fn diff_name_status_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
223    run_diff_name_status(root, &[since])
224}
225
226pub fn cached_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
227    run_diff_name_status(root, &["--cached"])
228}
229
230pub fn head_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
231    run_diff_name_status(root, &["HEAD"])
232}
233
234pub fn diff_unified_zero(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
235    run_diff_unified_zero(root, &[])
236}
237
238pub fn diff_unified_zero_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
239    run_diff_unified_zero(root, &[since])
240}
241
242fn run_diff_unified_zero(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
243    let root = root.as_ref();
244    if !root.join(".git").exists() {
245        return Ok(Vec::new());
246    }
247    let output = Command::new("git")
248        .arg("-C")
249        .arg(root)
250        .args(["-c", "core.quotePath=true"])
251        .arg("diff")
252        .args(extra_args)
253        .args(["--unified=0", "--no-ext-diff", "--no-textconv"])
254        .output()
255        .map_err(|err| OkError::Repository(format!("git diff failed: {err}")))?;
256    if !output.status.success() {
257        let stderr = String::from_utf8_lossy(&output.stderr);
258        return Err(OkError::Repository(format!(
259            "git diff failed: {}",
260            stderr.trim()
261        )));
262    }
263    parse_unified_zero_diff(&git_text(&output.stdout, "diff output")?)
264}
265
266fn run_diff_name_status(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
267    let root = root.as_ref();
268    if !root.join(".git").exists() {
269        return Ok(Vec::new());
270    }
271    let output = Command::new("git")
272        .arg("-C")
273        .arg(root)
274        .arg("diff")
275        .args(extra_args)
276        .args(["--name-status", "--find-renames"])
277        .output()
278        .map_err(|err| OkError::Repository(format!("git diff --name-status failed: {err}")))?;
279    if !output.status.success() {
280        let stderr = String::from_utf8_lossy(&output.stderr);
281        return Err(OkError::Repository(format!(
282            "git diff --name-status failed: {}",
283            stderr.trim()
284        )));
285    }
286    parse_diff_name_status(&git_text(&output.stdout, "diff name-status output")?)
287}
288
289pub fn cochange_records_from_history(
290    history: &CommitHistory,
291    max_files_per_commit: usize,
292) -> Vec<CochangeRecord> {
293    if max_files_per_commit < 2 {
294        return Vec::new();
295    }
296    let mut files_by_commit = HashMap::<&str, Vec<PathBuf>>::new();
297    for touch in &history.file_touches {
298        files_by_commit
299            .entry(touch.commit_id.0.as_str())
300            .or_default()
301            .push(touch.path.clone());
302    }
303    let mut pairs: HashMap<(PathBuf, PathBuf), CochangeRecord> = HashMap::new();
304    for (idx, commit) in history.commits.iter().enumerate() {
305        let mut files = files_by_commit
306            .remove(commit.id.0.as_str())
307            .unwrap_or_default();
308        files.sort();
309        files.dedup();
310        if files.len() < 2 || files.len() > max_files_per_commit {
311            continue;
312        }
313        let recency_weight = 1.0 / (1.0 + idx as f32 / 25.0);
314        for left in &files {
315            for right in &files {
316                if left == right {
317                    continue;
318                }
319                let key = (left.clone(), right.clone());
320                let entry = pairs.entry(key).or_insert_with(|| CochangeRecord {
321                    path: left.clone(),
322                    cochanged_path: right.clone(),
323                    commit_count: 0,
324                    recency_weight: 0.0,
325                    test_corun: is_test_path(right),
326                    commits: Vec::new(),
327                });
328                entry.commit_count += 1;
329                entry.recency_weight += recency_weight;
330                entry.test_corun |= is_test_path(right);
331                if entry.commits.len() < 5 {
332                    entry.commits.push(commit.id.0.clone());
333                }
334            }
335        }
336    }
337    let mut records = pairs.into_values().collect::<Vec<_>>();
338    records.sort_by(|a, b| {
339        b.recency_weight
340            .partial_cmp(&a.recency_weight)
341            .unwrap_or(std::cmp::Ordering::Equal)
342            .then_with(|| b.commit_count.cmp(&a.commit_count))
343            .then_with(|| a.path.cmp(&b.path))
344            .then_with(|| a.cochanged_path.cmp(&b.cochanged_path))
345    });
346    records
347}
348
349fn parse_commit_history(raw: &[u8]) -> Result<CommitHistory> {
350    let mut history = CommitHistory::empty();
351    for record in raw
352        .split(|byte| *byte == COMMIT_RECORD_SEPARATOR)
353        .filter(|record| !record.is_empty())
354    {
355        let fields = record.splitn(11, |byte| *byte == 0).collect::<Vec<_>>();
356        if fields.len() != 11 {
357            return Err(OkError::Repository(format!(
358                "git history record has {} fields; expected commit metadata and file statuses",
359                fields.len()
360            )));
361        }
362        let sha = git_text(fields[0], "commit id")?;
363        let parent_ids = git_text(fields[1], "parent commit ids")?
364            .split_whitespace()
365            .map(|id| GitCommitId::new(id.to_string()))
366            .collect::<Vec<_>>();
367        let author = owner(
368            git_text(fields[2], "author name")?,
369            git_text(fields[3], "author email")?,
370            "author",
371        )?;
372        let authored_at = git_timestamp(fields[4], "authored timestamp")?;
373        let committer = owner(
374            git_text(fields[5], "committer name")?,
375            git_text(fields[6], "committer email")?,
376            "committer",
377        )?;
378        let committed_at = git_timestamp(fields[7], "committed timestamp")?;
379        let mut summary = git_text(fields[8], "commit summary")?;
380        let message = git_text(fields[9], "commit message")?
381            .trim_end_matches(['\r', '\n'])
382            .to_string();
383        if summary.trim().is_empty() {
384            summary = message.lines().next().unwrap_or_default().to_string();
385        }
386        let commit_id = GitCommitId::new(sha);
387        let mut touches = parse_file_touches(fields[10], &commit_id, committed_at)?;
388        let file_count = touches.len();
389        history.commits.push(GitCommitRecord {
390            id: commit_id,
391            parent_ids,
392            author,
393            committer: Some(committer),
394            authored_at,
395            committed_at,
396            summary,
397            message,
398            file_count,
399        });
400        history.file_touches.append(&mut touches);
401    }
402    Ok(history)
403}
404
405fn parse_commit_patches(raw: &[u8]) -> Result<Vec<CommitPatch>> {
406    let mut commits = Vec::new();
407    let starts = patch_record_starts(raw);
408    if starts.is_empty() && !raw.is_empty() {
409        return Err(OkError::Repository(
410            "git patch output is missing a commit record".into(),
411        ));
412    }
413    for (index, start) in starts.iter().enumerate() {
414        let end = starts.get(index + 1).copied().unwrap_or(raw.len());
415        let record = &raw[start + 1..end];
416        let Some(metadata_end) = record.iter().position(|byte| *byte == 0) else {
417            return Err(OkError::Repository(
418                "git patch record is missing its commit delimiter".into(),
419            ));
420        };
421        let commit_id = GitCommitId::new(git_text(&record[..metadata_end], "commit id")?);
422        let patch = git_text(&record[metadata_end + 1..], "patch")?;
423        commits.push(CommitPatch {
424            commit_id,
425            files: parse_file_patches(&patch)?,
426        });
427    }
428    Ok(commits)
429}
430
431fn patch_record_starts(raw: &[u8]) -> Vec<usize> {
432    raw.iter()
433        .enumerate()
434        .filter_map(|(index, byte)| {
435            if *byte != COMMIT_RECORD_SEPARATOR {
436                return None;
437            }
438            let commit_start = index + 1;
439            [40, 64].into_iter().find_map(|length| {
440                let commit_end = commit_start + length;
441                (raw.get(commit_end) == Some(&0)
442                    && raw
443                        .get(commit_start..commit_end)
444                        .is_some_and(|commit| commit.iter().all(u8::is_ascii_hexdigit)))
445                .then_some(index)
446            })
447        })
448        .collect()
449}
450
451fn parse_file_patches(patch: &str) -> Result<Vec<FilePatch>> {
452    #[derive(Default)]
453    struct PendingPatch {
454        path: Option<PathBuf>,
455        previous_path: Option<PathBuf>,
456        line_ranges: Vec<LineRange>,
457    }
458
459    fn finish(patches: &mut Vec<FilePatch>, pending: &mut PendingPatch) {
460        if let Some(path) = pending.path.take() {
461            patches.push(FilePatch {
462                path,
463                previous_path: pending.previous_path.take(),
464                line_ranges: std::mem::take(&mut pending.line_ranges),
465            });
466        } else {
467            pending.previous_path = None;
468            pending.line_ranges.clear();
469        }
470    }
471
472    let mut patches = Vec::new();
473    let mut pending = PendingPatch::default();
474    for line in patch.lines() {
475        if line.starts_with("diff --git ") {
476            finish(&mut patches, &mut pending);
477        } else if let Some(value) = line.strip_prefix("rename from ") {
478            pending.previous_path = Some(parse_patch_path(value, None)?);
479        } else if let Some(value) = line.strip_prefix("rename to ") {
480            pending.path = Some(parse_patch_path(value, None)?);
481        } else if let Some(value) = line.strip_prefix("+++ ") {
482            if value != "/dev/null" {
483                pending.path = Some(parse_patch_path(value, Some("b/"))?);
484            }
485        } else if line.starts_with("@@ ") {
486            if let Some(range) = parse_new_hunk_range(line)? {
487                pending.line_ranges.push(range);
488            }
489        }
490    }
491    finish(&mut patches, &mut pending);
492    Ok(patches)
493}
494
495fn parse_diff_name_status(raw: &str) -> Result<Vec<DiffFile>> {
496    raw.lines()
497        .filter(|line| !line.trim().is_empty())
498        .map(|line| {
499            let mut fields = line.split('\t').collect::<Vec<_>>();
500            if fields.len() < 2 {
501                fields = line.split_whitespace().collect();
502            }
503            let status = fields.first().copied().unwrap_or_default();
504            if fields.len() < 2 {
505                return Err(OkError::Repository(format!(
506                    "git diff name-status entry is missing a path: `{line}`"
507                )));
508            }
509            let kind = change_kind(status.as_bytes());
510            let rename_score = status
511                .strip_prefix('R')
512                .or_else(|| status.strip_prefix('C'))
513                .and_then(|score| score.parse::<u8>().ok());
514            match kind {
515                GitChangeKind::Renamed | GitChangeKind::Copied => {
516                    if fields.len() < 3 {
517                        return Err(OkError::Repository(format!(
518                            "git diff name-status rename is missing paths: `{line}`"
519                        )));
520                    }
521                    Ok(DiffFile {
522                        old_path: Some(parse_patch_path(fields[1], None)?),
523                        new_path: Some(parse_patch_path(fields[2], None)?),
524                        status: kind,
525                        rename_score,
526                        hunks: Vec::new(),
527                    })
528                }
529                GitChangeKind::Deleted => Ok(DiffFile {
530                    old_path: Some(parse_patch_path(fields[1], None)?),
531                    new_path: None,
532                    status: kind,
533                    rename_score,
534                    hunks: Vec::new(),
535                }),
536                _ => Ok(DiffFile {
537                    old_path: None,
538                    new_path: Some(parse_patch_path(fields[1], None)?),
539                    status: kind,
540                    rename_score,
541                    hunks: Vec::new(),
542                }),
543            }
544        })
545        .collect()
546}
547
548fn parse_unified_zero_diff(patch: &str) -> Result<Vec<DiffFile>> {
549    #[derive(Default)]
550    struct PendingDiff {
551        old_path: Option<PathBuf>,
552        new_path: Option<PathBuf>,
553        status: Option<GitChangeKind>,
554        rename_score: Option<u8>,
555        hunks: Vec<DiffHunk>,
556    }
557
558    fn finish(files: &mut Vec<DiffFile>, pending: &mut PendingDiff) {
559        if pending.old_path.is_none() && pending.new_path.is_none() {
560            pending.hunks.clear();
561            pending.status = None;
562            pending.rename_score = None;
563            return;
564        }
565        let status = pending.status.unwrap_or_else(|| {
566            if pending.old_path.is_none() {
567                GitChangeKind::Added
568            } else if pending.new_path.is_none() {
569                GitChangeKind::Deleted
570            } else if pending.old_path != pending.new_path {
571                GitChangeKind::Renamed
572            } else {
573                GitChangeKind::Modified
574            }
575        });
576        files.push(DiffFile {
577            old_path: pending.old_path.take(),
578            new_path: pending.new_path.take(),
579            status,
580            rename_score: pending.rename_score.take(),
581            hunks: std::mem::take(&mut pending.hunks),
582        });
583    }
584
585    let mut files = Vec::new();
586    let mut pending = PendingDiff::default();
587    for line in patch.lines() {
588        if line.starts_with("diff --git ") {
589            finish(&mut files, &mut pending);
590        } else if line.starts_with("new file mode ") {
591            pending.status = Some(GitChangeKind::Added);
592        } else if line.starts_with("deleted file mode ") {
593            pending.status = Some(GitChangeKind::Deleted);
594        } else if let Some(score) = line.strip_prefix("similarity index ") {
595            pending.rename_score = score.trim_end_matches('%').parse::<u8>().ok();
596        } else if let Some(value) = line.strip_prefix("rename from ") {
597            pending.old_path = Some(parse_patch_path(value, None)?);
598            pending.status = Some(GitChangeKind::Renamed);
599        } else if let Some(value) = line.strip_prefix("rename to ") {
600            pending.new_path = Some(parse_patch_path(value, None)?);
601            pending.status = Some(GitChangeKind::Renamed);
602        } else if let Some(value) = line.strip_prefix("--- ") {
603            if value != "/dev/null" {
604                pending.old_path = Some(parse_patch_path(value, Some("a/"))?);
605            }
606        } else if let Some(value) = line.strip_prefix("+++ ") {
607            if value != "/dev/null" {
608                pending.new_path = Some(parse_patch_path(value, Some("b/"))?);
609            }
610        } else if line.starts_with("@@ ") {
611            pending.hunks.push(parse_diff_hunk(line)?);
612        }
613    }
614    finish(&mut files, &mut pending);
615    Ok(files)
616}
617
618fn parse_diff_hunk(header: &str) -> Result<DiffHunk> {
619    let old = header
620        .split_whitespace()
621        .find(|part| part.starts_with('-'))
622        .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
623    let new = header
624        .split_whitespace()
625        .find(|part| part.starts_with('+'))
626        .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
627    Ok(DiffHunk {
628        old_range: parse_hunk_range(old.trim_start_matches('-'))?,
629        new_range: parse_hunk_range(new.trim_start_matches('+'))?,
630    })
631}
632
633fn parse_hunk_range(value: &str) -> Result<Option<LineRange>> {
634    let (start, count) = value.split_once(',').unwrap_or((value, "1"));
635    let start = start.parse::<u32>().map_err(|err| {
636        OkError::Repository(format!("git diff hunk start `{start}` is invalid: {err}"))
637    })?;
638    let count = count.parse::<u32>().map_err(|err| {
639        OkError::Repository(format!("git diff hunk count `{count}` is invalid: {err}"))
640    })?;
641    if count == 0 {
642        return Ok(None);
643    }
644    Ok(Some(LineRange {
645        start,
646        end: start.saturating_add(count - 1),
647    }))
648}
649
650fn parse_new_hunk_range(header: &str) -> Result<Option<LineRange>> {
651    let marker = header
652        .split_whitespace()
653        .find(|part| part.starts_with('+'))
654        .ok_or_else(|| OkError::Repository(format!("git patch hunk is malformed: `{header}`")))?;
655    let value = marker.trim_start_matches('+');
656    let (start, count) = value.split_once(',').unwrap_or((value, "1"));
657    let start = start.parse::<u32>().map_err(|err| {
658        OkError::Repository(format!("git patch hunk start `{start}` is invalid: {err}"))
659    })?;
660    let count = count.parse::<u32>().map_err(|err| {
661        OkError::Repository(format!("git patch hunk count `{count}` is invalid: {err}"))
662    })?;
663    if count == 0 {
664        return Ok(None);
665    }
666    Ok(Some(LineRange {
667        start,
668        end: start.saturating_add(count - 1),
669    }))
670}
671
672fn parse_patch_path(value: &str, prefix: Option<&str>) -> Result<PathBuf> {
673    let decoded = if value.starts_with('"') {
674        decode_git_quoted_path(value)?
675    } else {
676        value.to_string()
677    };
678    let decoded = prefix
679        .and_then(|prefix| decoded.strip_prefix(prefix))
680        .unwrap_or(&decoded);
681    Ok(PathBuf::from(decoded))
682}
683
684fn decode_git_quoted_path(value: &str) -> Result<String> {
685    let Some(inner) = value
686        .strip_prefix('"')
687        .and_then(|value| value.strip_suffix('"'))
688    else {
689        return Err(OkError::Repository(format!(
690            "git patch path has invalid quoting: `{value}`"
691        )));
692    };
693    let mut bytes = Vec::with_capacity(inner.len());
694    let mut chars = inner.as_bytes().iter().copied().peekable();
695    while let Some(byte) = chars.next() {
696        if byte != b'\\' {
697            bytes.push(byte);
698            continue;
699        }
700        let escaped = chars.next().ok_or_else(|| {
701            OkError::Repository(format!("git patch path has a trailing escape: `{value}`"))
702        })?;
703        match escaped {
704            b'\\' | b'"' => bytes.push(escaped),
705            b'a' => bytes.push(0x07),
706            b'b' => bytes.push(0x08),
707            b't' => bytes.push(b'\t'),
708            b'n' => bytes.push(b'\n'),
709            b'v' => bytes.push(0x0b),
710            b'f' => bytes.push(0x0c),
711            b'r' => bytes.push(b'\r'),
712            b'0'..=b'7' => {
713                let mut octal = vec![escaped];
714                for _ in 0..2 {
715                    if chars.peek().is_some_and(|byte| matches!(byte, b'0'..=b'7')) {
716                        octal.push(chars.next().expect("peeked octal byte"));
717                    } else {
718                        break;
719                    }
720                }
721                let decoded = std::str::from_utf8(&octal)
722                    .ok()
723                    .and_then(|value| u8::from_str_radix(value, 8).ok())
724                    .ok_or_else(|| {
725                        OkError::Repository("git patch path contains invalid octal escape".into())
726                    })?;
727                bytes.push(decoded);
728            }
729            other => bytes.push(other),
730        }
731    }
732    String::from_utf8(bytes)
733        .map_err(|err| OkError::Repository(format!("git patch path is not UTF-8: {err}")))
734}
735
736fn parse_file_touches(
737    raw: &[u8],
738    commit_id: &GitCommitId,
739    touched_at: DateTime<Utc>,
740) -> Result<Vec<GitFileTouch>> {
741    let mut tokens = raw.split(|byte| *byte == 0);
742    let mut touches = Vec::new();
743    while let Some(status) = next_status(&mut tokens) {
744        let change_kind = change_kind(status);
745        let rename_or_copy = matches!(change_kind, GitChangeKind::Renamed | GitChangeKind::Copied);
746        let first_path = next_path(&mut tokens, commit_id, status)?;
747        let (path, previous_path) = if rename_or_copy {
748            let current_path = next_path(&mut tokens, commit_id, status)?;
749            (current_path, Some(first_path))
750        } else {
751            (first_path, None)
752        };
753        let id = HistoryRecordId::new(format!("file-touch:{}:{}", commit_id.0, touches.len()));
754        touches.push(GitFileTouch {
755            id,
756            commit_id: commit_id.clone(),
757            path,
758            previous_path,
759            change_kind,
760            additions: None,
761            deletions: None,
762            touched_at,
763        });
764    }
765    Ok(touches)
766}
767
768fn next_status<'a>(tokens: &mut impl Iterator<Item = &'a [u8]>) -> Option<&'a [u8]> {
769    tokens
770        .map(trim_status_prefix)
771        .find(|token| !token.is_empty())
772}
773
774fn next_path<'a>(
775    tokens: &mut impl Iterator<Item = &'a [u8]>,
776    commit_id: &GitCommitId,
777    status: &[u8],
778) -> Result<PathBuf> {
779    let path = tokens.find(|token| !token.is_empty()).ok_or_else(|| {
780        OkError::Repository(format!(
781            "git history record for commit `{commit_id}` is missing a path after status `{}`",
782            String::from_utf8_lossy(status)
783        ))
784    })?;
785    Ok(PathBuf::from(git_text(path, "changed path")?))
786}
787
788fn trim_status_prefix(mut value: &[u8]) -> &[u8] {
789    while value
790        .first()
791        .is_some_and(|byte| matches!(byte, b'\r' | b'\n'))
792    {
793        value = &value[1..];
794    }
795    value
796}
797
798fn change_kind(status: &[u8]) -> GitChangeKind {
799    match status.first().copied() {
800        Some(b'A') => GitChangeKind::Added,
801        Some(b'M') => GitChangeKind::Modified,
802        Some(b'D') => GitChangeKind::Deleted,
803        Some(b'R') => GitChangeKind::Renamed,
804        Some(b'C') => GitChangeKind::Copied,
805        Some(b'T') => GitChangeKind::TypeChanged,
806        _ => GitChangeKind::Unknown,
807    }
808}
809
810fn owner(name: String, email: String, role: &str) -> Result<Owner> {
811    let name = name.trim().to_string();
812    let email = email.trim().to_string();
813    let name = if name.is_empty() { email.clone() } else { name };
814    if name.is_empty() {
815        return Err(OkError::Repository(format!(
816            "git history {role} identity is empty"
817        )));
818    }
819    Ok(Owner {
820        name,
821        email: (!email.is_empty()).then_some(email),
822    })
823}
824
825fn git_timestamp(raw: &[u8], field: &str) -> Result<DateTime<Utc>> {
826    let value = git_text(raw, field)?;
827    DateTime::parse_from_rfc3339(&value)
828        .map(|timestamp| timestamp.with_timezone(&Utc))
829        .map_err(|err| {
830            OkError::Repository(format!("git history {field} `{value}` is invalid: {err}"))
831        })
832}
833
834fn git_text(raw: &[u8], field: &str) -> Result<String> {
835    String::from_utf8(raw.to_vec()).map_err(|err| {
836        OkError::Repository(format!("git history {field} is not valid UTF-8: {err}"))
837    })
838}
839
840fn is_test_path(path: &Path) -> bool {
841    let value = path.to_string_lossy().to_ascii_lowercase();
842    value.contains("/test/")
843        || value.contains("/tests/")
844        || value.ends_with("_test.rs")
845        || value.ends_with("_test.go")
846        || value.ends_with(".test.ts")
847        || value.ends_with(".spec.ts")
848        || value.ends_with("test.java")
849        || value.ends_with("tests.java")
850}
851
852#[cfg(test)]
853mod tests {
854    use super::{
855        cochange_records, commit_history, commit_patches, parse_commit_patches,
856        parse_diff_name_status, parse_file_patches, parse_unified_zero_diff,
857    };
858    use open_kioku_core::GitChangeKind;
859    use std::fs;
860    use std::path::Path;
861    use std::process::Command;
862
863    #[test]
864    fn cochange_records_apply_recency_and_test_corun() {
865        let dir = tempfile::tempdir().unwrap();
866        run(dir.path(), &["init"]);
867        run(dir.path(), &["config", "user.email", "test@example.com"]);
868        run(dir.path(), &["config", "user.name", "Test User"]);
869
870        write(dir.path(), "src/old.rs", "fn old() {}\n");
871        write(
872            dir.path(),
873            "tests/old_test.rs",
874            "#[test] fn old_test() {}\n",
875        );
876        run(dir.path(), &["add", "."]);
877        run(dir.path(), &["commit", "-m", "old pair"]);
878
879        write(dir.path(), "src/new.rs", "fn new() {}\n");
880        write(
881            dir.path(),
882            "tests/new_test.rs",
883            "#[test] fn new_test() {}\n",
884        );
885        run(dir.path(), &["add", "."]);
886        run(dir.path(), &["commit", "-m", "new pair"]);
887
888        let records = cochange_records(dir.path(), 20, 10).unwrap();
889        let new_pair = records
890            .iter()
891            .find(|record| {
892                record.path == std::path::Path::new("src/new.rs")
893                    && record.cochanged_path == std::path::Path::new("tests/new_test.rs")
894            })
895            .unwrap();
896        let old_pair = records
897            .iter()
898            .find(|record| {
899                record.path == std::path::Path::new("src/old.rs")
900                    && record.cochanged_path == std::path::Path::new("tests/old_test.rs")
901            })
902            .unwrap();
903
904        assert!(new_pair.test_corun);
905        assert!(new_pair.recency_weight > old_pair.recency_weight);
906        assert_eq!(new_pair.commit_count, 1);
907    }
908
909    #[test]
910    fn commit_history_respects_window_and_keeps_every_file_touch() {
911        let dir = initialized_repo();
912        write(dir.path(), "src/old.rs", "fn old() {}\n");
913        commit_all(dir.path(), "old");
914        write(dir.path(), "src/a.rs", "fn a() {}\n");
915        write(dir.path(), "src/b.rs", "fn b() {}\n");
916        write(dir.path(), "tests/a_test.rs", "#[test] fn a() {}\n");
917        commit_all(dir.path(), "multi-file change");
918
919        let history = commit_history(dir.path(), 1).unwrap();
920
921        assert_eq!(history.commits.len(), 1);
922        assert_eq!(history.commits[0].summary, "multi-file change");
923        assert_eq!(history.commits[0].author.name, "Test User");
924        assert_eq!(
925            history.commits[0].author.email.as_deref(),
926            Some("test@example.com")
927        );
928        assert_eq!(history.commits[0].file_count, 3);
929        assert_eq!(history.file_touches.len(), 3);
930        assert!(history
931            .file_touches
932            .iter()
933            .all(|touch| touch.commit_id == history.commits[0].id));
934    }
935
936    #[test]
937    fn commit_history_captures_renames() {
938        let dir = initialized_repo();
939        write(dir.path(), "src/old.rs", "fn renamed() {}\n");
940        commit_all(dir.path(), "add old path");
941        run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
942        commit_all(dir.path(), "rename path");
943
944        let history = commit_history(dir.path(), 1).unwrap();
945        let touch = history.file_touches.first().unwrap();
946
947        assert_eq!(touch.change_kind, GitChangeKind::Renamed);
948        assert_eq!(
949            touch.previous_path.as_deref(),
950            Some(Path::new("src/old.rs"))
951        );
952        assert_eq!(touch.path, Path::new("src/new.rs"));
953    }
954
955    #[test]
956    fn commit_history_handles_empty_and_shallow_repositories() {
957        let empty = initialized_repo();
958        assert_eq!(
959            commit_history(empty.path(), 10).unwrap(),
960            super::CommitHistory::empty()
961        );
962
963        let origin = initialized_repo();
964        write(origin.path(), "src/one.rs", "fn one() {}\n");
965        commit_all(origin.path(), "one");
966        write(origin.path(), "src/two.rs", "fn two() {}\n");
967        commit_all(origin.path(), "two");
968
969        let clone_parent = tempfile::tempdir().unwrap();
970        let shallow = clone_parent.path().join("shallow");
971        let source = format!("file://{}", origin.path().canonicalize().unwrap().display());
972        let status = Command::new("git")
973            .args(["clone", "--quiet", "--depth", "1"])
974            .arg(source)
975            .arg(&shallow)
976            .status()
977            .unwrap();
978        assert!(status.success());
979
980        let history = commit_history(&shallow, 10).unwrap();
981        assert_eq!(history.commits.len(), 1);
982        assert_eq!(history.commits[0].summary, "two");
983    }
984
985    #[test]
986    fn commit_patches_capture_zero_context_line_ranges_and_renames() {
987        let dir = initialized_repo();
988        write(
989            dir.path(),
990            "src/old.rs",
991            "fn alpha() {\n    one();\n}\n\nfn beta() {\n    two();\n}\n",
992        );
993        commit_all(dir.path(), "add symbols");
994        run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
995        write(
996            dir.path(),
997            "src/new.rs",
998            "fn alpha() {\n    changed();\n}\n\nfn beta() {\n    two();\n    added();\n}\n",
999        );
1000        commit_all(dir.path(), "rename and modify");
1001
1002        let patches = commit_patches(dir.path(), 1).unwrap();
1003
1004        assert_eq!(patches.len(), 1);
1005        assert_eq!(patches[0].files.len(), 1);
1006        let file = &patches[0].files[0];
1007        assert_eq!(file.path, Path::new("src/new.rs"));
1008        assert_eq!(file.previous_path.as_deref(), Some(Path::new("src/old.rs")));
1009        assert_eq!(
1010            file.line_ranges,
1011            vec![
1012                open_kioku_core::LineRange { start: 2, end: 2 },
1013                open_kioku_core::LineRange { start: 7, end: 7 }
1014            ]
1015        );
1016    }
1017
1018    #[test]
1019    fn diff_name_status_parser_captures_added_modified_deleted_and_renamed() {
1020        let files = parse_diff_name_status(
1021            "A\tsrc/new.rs\n\
1022             M\tsrc/lib.rs\n\
1023             D\tsrc/old.rs\n\
1024             R087\tsrc/before.rs\tsrc/after.rs\n",
1025        )
1026        .unwrap();
1027
1028        assert_eq!(files.len(), 4);
1029        assert_eq!(files[0].status, GitChangeKind::Added);
1030        assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1031        assert_eq!(files[1].status, GitChangeKind::Modified);
1032        assert_eq!(files[2].status, GitChangeKind::Deleted);
1033        assert_eq!(files[2].old_path.as_deref(), Some(Path::new("src/old.rs")));
1034        assert_eq!(files[3].status, GitChangeKind::Renamed);
1035        assert_eq!(files[3].rename_score, Some(87));
1036        assert_eq!(
1037            files[3].old_path.as_deref(),
1038            Some(Path::new("src/before.rs"))
1039        );
1040        assert_eq!(
1041            files[3].new_path.as_deref(),
1042            Some(Path::new("src/after.rs"))
1043        );
1044    }
1045
1046    #[test]
1047    fn unified_zero_diff_parser_captures_old_new_hunks_and_changed_ranges() {
1048        let files = parse_unified_zero_diff(
1049            "diff --git a/src/old.rs b/src/new.rs\n\
1050             similarity index 92%\n\
1051             rename from src/old.rs\n\
1052             rename to src/new.rs\n\
1053             --- a/src/old.rs\n\
1054             +++ b/src/new.rs\n\
1055             @@ -2 +2 @@\n\
1056             -old();\n\
1057             +new();\n\
1058             @@ -8,0 +9,2 @@\n\
1059             +added();\n\
1060             +again();\n\
1061             diff --git a/src/deleted.rs b/src/deleted.rs\n\
1062             deleted file mode 100644\n\
1063             --- a/src/deleted.rs\n\
1064             +++ /dev/null\n\
1065             @@ -1,3 +0,0 @@\n",
1066        )
1067        .unwrap();
1068
1069        assert_eq!(files.len(), 2);
1070        assert_eq!(files[0].status, GitChangeKind::Renamed);
1071        assert_eq!(files[0].rename_score, Some(92));
1072        assert_eq!(files[0].old_path.as_deref(), Some(Path::new("src/old.rs")));
1073        assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1074        assert_eq!(
1075            files[0].hunks,
1076            vec![
1077                super::DiffHunk {
1078                    old_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1079                    new_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1080                },
1081                super::DiffHunk {
1082                    old_range: None,
1083                    new_range: Some(open_kioku_core::LineRange { start: 9, end: 10 }),
1084                }
1085            ]
1086        );
1087        assert_eq!(
1088            files[0].changed_line_ranges(),
1089            vec![
1090                open_kioku_core::LineRange { start: 2, end: 2 },
1091                open_kioku_core::LineRange { start: 9, end: 10 }
1092            ]
1093        );
1094        assert_eq!(files[1].status, GitChangeKind::Deleted);
1095        assert_eq!(
1096            files[1].hunks[0].old_range,
1097            Some(open_kioku_core::LineRange { start: 1, end: 3 })
1098        );
1099        assert_eq!(files[1].hunks[0].new_range, None);
1100    }
1101
1102    #[test]
1103    fn patch_parser_decodes_quoted_paths_and_ignores_deletion_ranges() {
1104        let patches = parse_file_patches(
1105            "diff --git \"a/src/space\\040name.rs\" \"b/src/space\\040name.rs\"\n\
1106             --- \"a/src/space\\040name.rs\"\n\
1107             +++ \"b/src/space\\040name.rs\"\n\
1108             @@ -3,2 +3,0 @@\n\
1109             @@ -8 +6,2 @@\n",
1110        )
1111        .unwrap();
1112
1113        assert_eq!(patches.len(), 1);
1114        assert_eq!(patches[0].path, Path::new("src/space name.rs"));
1115        assert_eq!(
1116            patches[0].line_ranges,
1117            vec![open_kioku_core::LineRange { start: 6, end: 7 }]
1118        );
1119    }
1120
1121    #[test]
1122    fn patch_parser_ignores_record_separator_bytes_inside_diff_content() {
1123        let mut raw = b"\x1e0123456789abcdef0123456789abcdef01234567\x00diff --git a/a.rs b/a.rs\n\
1124              +++ b/a.rs\n\
1125              @@ -0,0 +1 @@\n\
1126              +embedded "
1127            .to_vec();
1128        raw.push(0x1e);
1129        raw.extend_from_slice(b" byte\n");
1130
1131        let patches = parse_commit_patches(&raw).unwrap();
1132
1133        assert_eq!(patches.len(), 1);
1134        assert_eq!(patches[0].files.len(), 1);
1135        assert_eq!(patches[0].files[0].path, Path::new("a.rs"));
1136    }
1137
1138    fn initialized_repo() -> tempfile::TempDir {
1139        let dir = tempfile::tempdir().unwrap();
1140        run(dir.path(), &["init", "--quiet"]);
1141        run(dir.path(), &["config", "user.email", "test@example.com"]);
1142        run(dir.path(), &["config", "user.name", "Test User"]);
1143        run(dir.path(), &["config", "commit.gpgsign", "false"]);
1144        dir
1145    }
1146
1147    fn commit_all(root: &Path, message: &str) {
1148        run(root, &["add", "."]);
1149        run(root, &["commit", "--quiet", "-m", message]);
1150    }
1151
1152    fn write(root: &Path, path: &str, content: &str) {
1153        let path = root.join(path);
1154        fs::create_dir_all(path.parent().unwrap()).unwrap();
1155        fs::write(path, content).unwrap();
1156    }
1157
1158    fn run(root: &Path, args: &[&str]) {
1159        let status = Command::new("git")
1160            .arg("-C")
1161            .arg(root)
1162            .args(args)
1163            .status()
1164            .unwrap();
1165        assert!(status.success(), "git {args:?} failed");
1166    }
1167}