1mod ownership;
2mod reviewers;
3
4use chrono::{DateTime, Utc};
5use open_kioku_core::{
6 GitChangeKind, GitCommitId, GitCommitRecord, GitFileTouch, HistoryRecordId, LineRange, Owner,
7};
8use open_kioku_errors::{OkError, Result};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::path::{Path, PathBuf};
13use std::process::Command;
14
15const COMMIT_RECORD_SEPARATOR: u8 = 0x1e;
16const GIT_COMMIT_FORMAT: &str =
17 "--format=%x1e%H%x00%P%x00%an%x00%ae%x00%aI%x00%cn%x00%ce%x00%cI%x00%s%x00%B%x00";
18
19pub use ownership::{ownership_for_path, OwnershipInput};
20pub use reviewers::{suggest_reviewers, ReviewerSuggestionInput};
21
22#[derive(Debug, Clone, PartialEq)]
23pub struct CommitHistory {
24 pub commits: Vec<GitCommitRecord>,
25 pub file_touches: Vec<GitFileTouch>,
26}
27
28impl CommitHistory {
29 pub fn empty() -> Self {
30 Self {
31 commits: Vec::new(),
32 file_touches: Vec::new(),
33 }
34 }
35}
36
37#[derive(Debug, Clone, PartialEq)]
38pub struct CochangeRecord {
39 pub path: PathBuf,
40 pub cochanged_path: PathBuf,
41 pub commit_count: usize,
42 pub recency_weight: f32,
43 pub test_corun: bool,
44 pub commits: Vec<String>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct CommitPatch {
49 pub commit_id: GitCommitId,
50 pub files: Vec<FilePatch>,
51}
52
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct FilePatch {
55 pub path: PathBuf,
56 pub previous_path: Option<PathBuf>,
57 pub line_ranges: Vec<LineRange>,
58}
59
60#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
61pub struct DiffFile {
62 pub old_path: Option<PathBuf>,
63 pub new_path: Option<PathBuf>,
64 pub status: GitChangeKind,
65 pub rename_score: Option<u8>,
66 pub hunks: Vec<DiffHunk>,
67}
68
69impl DiffFile {
70 pub fn changed_line_ranges(&self) -> Vec<LineRange> {
71 self.hunks
72 .iter()
73 .filter_map(|hunk| hunk.new_range.clone())
74 .collect()
75 }
76}
77
78#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79pub struct DiffHunk {
80 pub old_range: Option<LineRange>,
81 pub new_range: Option<LineRange>,
82}
83
84pub fn discover_root(start: impl AsRef<Path>) -> Result<PathBuf> {
85 let mut current = start.as_ref().canonicalize()?;
86 loop {
87 if current.join(".git").exists() || current.join("ok.toml").exists() {
88 return Ok(current);
89 }
90 if !current.pop() {
91 return Ok(start.as_ref().canonicalize()?);
92 }
93 }
94}
95
96pub fn branch(root: impl AsRef<Path>) -> Option<String> {
97 let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
98 if let Some(value) = head.strip_prefix("ref: refs/heads/") {
99 return Some(value.trim().to_string());
100 }
101 None
102}
103
104pub fn commit(root: impl AsRef<Path>) -> Option<String> {
105 let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
106 if !head.starts_with("ref: ") {
107 return Some(head.trim().to_string());
108 }
109 let reference = head.trim().strip_prefix("ref: ")?;
110 fs::read_to_string(root.as_ref().join(".git").join(reference))
111 .ok()
112 .map(|value| value.trim().to_string())
113}
114
115pub fn require_repo(root: impl AsRef<Path>) -> Result<PathBuf> {
116 let root = discover_root(root)?;
117 if !root.exists() {
118 return Err(OkError::Repository(format!(
119 "repository root does not exist: {}",
120 root.display()
121 )));
122 }
123 Ok(root)
124}
125
126pub fn cochange_records(
127 root: impl AsRef<Path>,
128 max_commits: usize,
129 max_files_per_commit: usize,
130) -> Result<Vec<CochangeRecord>> {
131 let history = commit_history(root, max_commits)?;
132 Ok(cochange_records_from_history(
133 &history,
134 max_files_per_commit,
135 ))
136}
137
138pub fn commit_history(root: impl AsRef<Path>, max_commits: usize) -> Result<CommitHistory> {
139 let root = root.as_ref();
140 if !root.join(".git").exists() || max_commits == 0 {
141 return Ok(CommitHistory::empty());
142 }
143 let head = Command::new("git")
144 .arg("-C")
145 .arg(root)
146 .args(["rev-parse", "--verify", "HEAD"])
147 .output()
148 .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
149 if !head.status.success() {
150 return Ok(CommitHistory::empty());
151 }
152 let output = Command::new("git")
153 .arg("-C")
154 .arg(root)
155 .arg("log")
156 .arg(format!("--max-count={max_commits}"))
157 .args([
158 "--no-show-signature",
159 "--no-color",
160 "--no-decorate",
161 "--encoding=UTF-8",
162 "--date=iso-strict",
163 "--find-renames",
164 GIT_COMMIT_FORMAT,
165 "--name-status",
166 "-z",
167 ])
168 .output()
169 .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
170 if !output.status.success() {
171 let stderr = String::from_utf8_lossy(&output.stderr);
172 return Err(OkError::Repository(format!(
173 "git history scan failed: {}",
174 stderr.trim()
175 )));
176 }
177 parse_commit_history(&output.stdout)
178}
179
180pub fn commit_patches(root: impl AsRef<Path>, max_commits: usize) -> Result<Vec<CommitPatch>> {
181 let root = root.as_ref();
182 if !root.join(".git").exists() || max_commits == 0 {
183 return Ok(Vec::new());
184 }
185 let head = Command::new("git")
186 .arg("-C")
187 .arg(root)
188 .args(["rev-parse", "--verify", "HEAD"])
189 .output()
190 .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
191 if !head.status.success() {
192 return Ok(Vec::new());
193 }
194 let output = Command::new("git")
195 .arg("-C")
196 .arg(root)
197 .args(["-c", "core.quotePath=true"])
198 .arg("log")
199 .arg(format!("--max-count={max_commits}"))
200 .args([
201 "--no-show-signature",
202 "--no-color",
203 "--no-decorate",
204 "--encoding=UTF-8",
205 "--find-renames",
206 "--format=%x1e%H%x00",
207 "--patch",
208 "--unified=0",
209 "--no-ext-diff",
210 "--no-textconv",
211 ])
212 .output()
213 .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
214 if !output.status.success() {
215 let stderr = String::from_utf8_lossy(&output.stderr);
216 return Err(OkError::Repository(format!(
217 "git patch scan failed: {}",
218 stderr.trim()
219 )));
220 }
221 parse_commit_patches(&output.stdout)
222}
223
224pub fn diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
225 run_diff_name_status(root, &[])
226}
227
228pub fn diff_name_status_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
229 run_diff_name_status(root, &[since])
230}
231
232pub fn cached_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
233 run_diff_name_status(root, &["--cached"])
234}
235
236pub fn head_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
237 run_diff_name_status(root, &["HEAD"])
238}
239
240pub fn diff_unified_zero(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
241 run_diff_unified_zero(root, &[])
242}
243
244pub fn diff_unified_zero_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
245 run_diff_unified_zero(root, &[since])
246}
247
248fn run_diff_unified_zero(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
249 let root = root.as_ref();
250 if !root.join(".git").exists() {
251 return Ok(Vec::new());
252 }
253 let output = Command::new("git")
254 .arg("-C")
255 .arg(root)
256 .args(["-c", "core.quotePath=true"])
257 .arg("diff")
258 .args(extra_args)
259 .args(["--unified=0", "--no-ext-diff", "--no-textconv"])
260 .output()
261 .map_err(|err| OkError::Repository(format!("git diff failed: {err}")))?;
262 if !output.status.success() {
263 let stderr = String::from_utf8_lossy(&output.stderr);
264 return Err(OkError::Repository(format!(
265 "git diff failed: {}",
266 stderr.trim()
267 )));
268 }
269 parse_unified_zero_diff(&git_text(&output.stdout, "diff output")?)
270}
271
272fn run_diff_name_status(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
273 let root = root.as_ref();
274 if !root.join(".git").exists() {
275 return Ok(Vec::new());
276 }
277 let output = Command::new("git")
278 .arg("-C")
279 .arg(root)
280 .arg("diff")
281 .args(extra_args)
282 .args(["--name-status", "--find-renames"])
283 .output()
284 .map_err(|err| OkError::Repository(format!("git diff --name-status failed: {err}")))?;
285 if !output.status.success() {
286 let stderr = String::from_utf8_lossy(&output.stderr);
287 return Err(OkError::Repository(format!(
288 "git diff --name-status failed: {}",
289 stderr.trim()
290 )));
291 }
292 parse_diff_name_status(&git_text(&output.stdout, "diff name-status output")?)
293}
294
295pub fn cochange_records_from_history(
296 history: &CommitHistory,
297 max_files_per_commit: usize,
298) -> Vec<CochangeRecord> {
299 if max_files_per_commit < 2 {
300 return Vec::new();
301 }
302 let mut files_by_commit = HashMap::<&str, Vec<PathBuf>>::new();
303 for touch in &history.file_touches {
304 files_by_commit
305 .entry(touch.commit_id.0.as_str())
306 .or_default()
307 .push(touch.path.clone());
308 }
309 let mut pairs: HashMap<(PathBuf, PathBuf), CochangeRecord> = HashMap::new();
310 for (idx, commit) in history.commits.iter().enumerate() {
311 let mut files = files_by_commit
312 .remove(commit.id.0.as_str())
313 .unwrap_or_default();
314 files.sort();
315 files.dedup();
316 if files.len() < 2 || files.len() > max_files_per_commit {
317 continue;
318 }
319 let recency_weight = 1.0 / (1.0 + idx as f32 / 25.0);
320 for left in &files {
321 for right in &files {
322 if left == right {
323 continue;
324 }
325 let key = (left.clone(), right.clone());
326 let entry = pairs.entry(key).or_insert_with(|| CochangeRecord {
327 path: left.clone(),
328 cochanged_path: right.clone(),
329 commit_count: 0,
330 recency_weight: 0.0,
331 test_corun: is_test_path(right),
332 commits: Vec::new(),
333 });
334 entry.commit_count += 1;
335 entry.recency_weight += recency_weight;
336 entry.test_corun |= is_test_path(right);
337 if entry.commits.len() < 5 {
338 entry.commits.push(commit.id.0.clone());
339 }
340 }
341 }
342 }
343 let mut records = pairs.into_values().collect::<Vec<_>>();
344 records.sort_by(|a, b| {
345 b.recency_weight
346 .partial_cmp(&a.recency_weight)
347 .unwrap_or(std::cmp::Ordering::Equal)
348 .then_with(|| b.commit_count.cmp(&a.commit_count))
349 .then_with(|| a.path.cmp(&b.path))
350 .then_with(|| a.cochanged_path.cmp(&b.cochanged_path))
351 });
352 records
353}
354
355fn parse_commit_history(raw: &[u8]) -> Result<CommitHistory> {
356 let mut history = CommitHistory::empty();
357 for record in raw
358 .split(|byte| *byte == COMMIT_RECORD_SEPARATOR)
359 .filter(|record| !record.is_empty())
360 {
361 let fields = record.splitn(11, |byte| *byte == 0).collect::<Vec<_>>();
362 if fields.len() != 11 {
363 return Err(OkError::Repository(format!(
364 "git history record has {} fields; expected commit metadata and file statuses",
365 fields.len()
366 )));
367 }
368 let sha = git_text(fields[0], "commit id")?;
369 let parent_ids = git_text(fields[1], "parent commit ids")?
370 .split_whitespace()
371 .map(|id| GitCommitId::new(id.to_string()))
372 .collect::<Vec<_>>();
373 let author = owner(
374 git_text(fields[2], "author name")?,
375 git_text(fields[3], "author email")?,
376 "author",
377 )?;
378 let authored_at = git_timestamp(fields[4], "authored timestamp")?;
379 let committer = owner(
380 git_text(fields[5], "committer name")?,
381 git_text(fields[6], "committer email")?,
382 "committer",
383 )?;
384 let committed_at = git_timestamp(fields[7], "committed timestamp")?;
385 let mut summary = git_text(fields[8], "commit summary")?;
386 let message = git_text(fields[9], "commit message")?
387 .trim_end_matches(['\r', '\n'])
388 .to_string();
389 if summary.trim().is_empty() {
390 summary = message.lines().next().unwrap_or_default().to_string();
391 }
392 let commit_id = GitCommitId::new(sha);
393 let mut touches = parse_file_touches(fields[10], &commit_id, committed_at)?;
394 let file_count = touches.len();
395 history.commits.push(GitCommitRecord {
396 id: commit_id,
397 parent_ids,
398 author,
399 committer: Some(committer),
400 authored_at,
401 committed_at,
402 summary,
403 message,
404 file_count,
405 });
406 history.file_touches.append(&mut touches);
407 }
408 Ok(history)
409}
410
411fn parse_commit_patches(raw: &[u8]) -> Result<Vec<CommitPatch>> {
412 let mut commits = Vec::new();
413 let starts = patch_record_starts(raw);
414 if starts.is_empty() && !raw.is_empty() {
415 return Err(OkError::Repository(
416 "git patch output is missing a commit record".into(),
417 ));
418 }
419 for (index, start) in starts.iter().enumerate() {
420 let end = starts.get(index + 1).copied().unwrap_or(raw.len());
421 let record = &raw[start + 1..end];
422 let Some(metadata_end) = record.iter().position(|byte| *byte == 0) else {
423 return Err(OkError::Repository(
424 "git patch record is missing its commit delimiter".into(),
425 ));
426 };
427 let commit_id = GitCommitId::new(git_text(&record[..metadata_end], "commit id")?);
428 let patch = git_text(&record[metadata_end + 1..], "patch")?;
429 commits.push(CommitPatch {
430 commit_id,
431 files: parse_file_patches(&patch)?,
432 });
433 }
434 Ok(commits)
435}
436
437fn patch_record_starts(raw: &[u8]) -> Vec<usize> {
438 raw.iter()
439 .enumerate()
440 .filter_map(|(index, byte)| {
441 if *byte != COMMIT_RECORD_SEPARATOR {
442 return None;
443 }
444 let commit_start = index + 1;
445 [40, 64].into_iter().find_map(|length| {
446 let commit_end = commit_start + length;
447 (raw.get(commit_end) == Some(&0)
448 && raw
449 .get(commit_start..commit_end)
450 .is_some_and(|commit| commit.iter().all(u8::is_ascii_hexdigit)))
451 .then_some(index)
452 })
453 })
454 .collect()
455}
456
457fn parse_file_patches(patch: &str) -> Result<Vec<FilePatch>> {
458 #[derive(Default)]
459 struct PendingPatch {
460 path: Option<PathBuf>,
461 previous_path: Option<PathBuf>,
462 line_ranges: Vec<LineRange>,
463 }
464
465 fn finish(patches: &mut Vec<FilePatch>, pending: &mut PendingPatch) {
466 if let Some(path) = pending.path.take() {
467 patches.push(FilePatch {
468 path,
469 previous_path: pending.previous_path.take(),
470 line_ranges: std::mem::take(&mut pending.line_ranges),
471 });
472 } else {
473 pending.previous_path = None;
474 pending.line_ranges.clear();
475 }
476 }
477
478 let mut patches = Vec::new();
479 let mut pending = PendingPatch::default();
480 for line in patch.lines() {
481 if line.starts_with("diff --git ") {
482 finish(&mut patches, &mut pending);
483 } else if let Some(value) = line.strip_prefix("rename from ") {
484 pending.previous_path = Some(parse_patch_path(value, None)?);
485 } else if let Some(value) = line.strip_prefix("rename to ") {
486 pending.path = Some(parse_patch_path(value, None)?);
487 } else if let Some(value) = line.strip_prefix("+++ ") {
488 if value != "/dev/null" {
489 pending.path = Some(parse_patch_path(value, Some("b/"))?);
490 }
491 } else if line.starts_with("@@ ") {
492 if let Some(range) = parse_new_hunk_range(line)? {
493 pending.line_ranges.push(range);
494 }
495 }
496 }
497 finish(&mut patches, &mut pending);
498 Ok(patches)
499}
500
501fn parse_diff_name_status(raw: &str) -> Result<Vec<DiffFile>> {
502 raw.lines()
503 .filter(|line| !line.trim().is_empty())
504 .map(|line| {
505 let mut fields = line.split('\t').collect::<Vec<_>>();
506 if fields.len() < 2 {
507 fields = line.split_whitespace().collect();
508 }
509 let status = fields.first().copied().unwrap_or_default();
510 if fields.len() < 2 {
511 return Err(OkError::Repository(format!(
512 "git diff name-status entry is missing a path: `{line}`"
513 )));
514 }
515 let kind = change_kind(status.as_bytes());
516 let rename_score = status
517 .strip_prefix('R')
518 .or_else(|| status.strip_prefix('C'))
519 .and_then(|score| score.parse::<u8>().ok());
520 match kind {
521 GitChangeKind::Renamed | GitChangeKind::Copied => {
522 if fields.len() < 3 {
523 return Err(OkError::Repository(format!(
524 "git diff name-status rename is missing paths: `{line}`"
525 )));
526 }
527 Ok(DiffFile {
528 old_path: Some(parse_patch_path(fields[1], None)?),
529 new_path: Some(parse_patch_path(fields[2], None)?),
530 status: kind,
531 rename_score,
532 hunks: Vec::new(),
533 })
534 }
535 GitChangeKind::Deleted => Ok(DiffFile {
536 old_path: Some(parse_patch_path(fields[1], None)?),
537 new_path: None,
538 status: kind,
539 rename_score,
540 hunks: Vec::new(),
541 }),
542 _ => Ok(DiffFile {
543 old_path: None,
544 new_path: Some(parse_patch_path(fields[1], None)?),
545 status: kind,
546 rename_score,
547 hunks: Vec::new(),
548 }),
549 }
550 })
551 .collect()
552}
553
554fn parse_unified_zero_diff(patch: &str) -> Result<Vec<DiffFile>> {
555 #[derive(Default)]
556 struct PendingDiff {
557 old_path: Option<PathBuf>,
558 new_path: Option<PathBuf>,
559 status: Option<GitChangeKind>,
560 rename_score: Option<u8>,
561 hunks: Vec<DiffHunk>,
562 }
563
564 fn finish(files: &mut Vec<DiffFile>, pending: &mut PendingDiff) {
565 if pending.old_path.is_none() && pending.new_path.is_none() {
566 pending.hunks.clear();
567 pending.status = None;
568 pending.rename_score = None;
569 return;
570 }
571 let status = pending.status.unwrap_or_else(|| {
572 if pending.old_path.is_none() {
573 GitChangeKind::Added
574 } else if pending.new_path.is_none() {
575 GitChangeKind::Deleted
576 } else if pending.old_path != pending.new_path {
577 GitChangeKind::Renamed
578 } else {
579 GitChangeKind::Modified
580 }
581 });
582 files.push(DiffFile {
583 old_path: pending.old_path.take(),
584 new_path: pending.new_path.take(),
585 status,
586 rename_score: pending.rename_score.take(),
587 hunks: std::mem::take(&mut pending.hunks),
588 });
589 }
590
591 let mut files = Vec::new();
592 let mut pending = PendingDiff::default();
593 for line in patch.lines() {
594 if line.starts_with("diff --git ") {
595 finish(&mut files, &mut pending);
596 } else if line.starts_with("new file mode ") {
597 pending.status = Some(GitChangeKind::Added);
598 } else if line.starts_with("deleted file mode ") {
599 pending.status = Some(GitChangeKind::Deleted);
600 } else if let Some(score) = line.strip_prefix("similarity index ") {
601 pending.rename_score = score.trim_end_matches('%').parse::<u8>().ok();
602 } else if let Some(value) = line.strip_prefix("rename from ") {
603 pending.old_path = Some(parse_patch_path(value, None)?);
604 pending.status = Some(GitChangeKind::Renamed);
605 } else if let Some(value) = line.strip_prefix("rename to ") {
606 pending.new_path = Some(parse_patch_path(value, None)?);
607 pending.status = Some(GitChangeKind::Renamed);
608 } else if let Some(value) = line.strip_prefix("--- ") {
609 if value != "/dev/null" {
610 pending.old_path = Some(parse_patch_path(value, Some("a/"))?);
611 }
612 } else if let Some(value) = line.strip_prefix("+++ ") {
613 if value != "/dev/null" {
614 pending.new_path = Some(parse_patch_path(value, Some("b/"))?);
615 }
616 } else if line.starts_with("@@ ") {
617 pending.hunks.push(parse_diff_hunk(line)?);
618 }
619 }
620 finish(&mut files, &mut pending);
621 Ok(files)
622}
623
624fn parse_diff_hunk(header: &str) -> Result<DiffHunk> {
625 let old = header
626 .split_whitespace()
627 .find(|part| part.starts_with('-'))
628 .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
629 let new = header
630 .split_whitespace()
631 .find(|part| part.starts_with('+'))
632 .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
633 Ok(DiffHunk {
634 old_range: parse_hunk_range(old.trim_start_matches('-'))?,
635 new_range: parse_hunk_range(new.trim_start_matches('+'))?,
636 })
637}
638
639fn parse_hunk_range(value: &str) -> Result<Option<LineRange>> {
640 let (start, count) = value.split_once(',').unwrap_or((value, "1"));
641 let start = start.parse::<u32>().map_err(|err| {
642 OkError::Repository(format!("git diff hunk start `{start}` is invalid: {err}"))
643 })?;
644 let count = count.parse::<u32>().map_err(|err| {
645 OkError::Repository(format!("git diff hunk count `{count}` is invalid: {err}"))
646 })?;
647 if count == 0 {
648 return Ok(None);
649 }
650 Ok(Some(LineRange {
651 start,
652 end: start.saturating_add(count - 1),
653 }))
654}
655
656fn parse_new_hunk_range(header: &str) -> Result<Option<LineRange>> {
657 let marker = header
658 .split_whitespace()
659 .find(|part| part.starts_with('+'))
660 .ok_or_else(|| OkError::Repository(format!("git patch hunk is malformed: `{header}`")))?;
661 let value = marker.trim_start_matches('+');
662 let (start, count) = value.split_once(',').unwrap_or((value, "1"));
663 let start = start.parse::<u32>().map_err(|err| {
664 OkError::Repository(format!("git patch hunk start `{start}` is invalid: {err}"))
665 })?;
666 let count = count.parse::<u32>().map_err(|err| {
667 OkError::Repository(format!("git patch hunk count `{count}` is invalid: {err}"))
668 })?;
669 if count == 0 {
670 return Ok(None);
671 }
672 Ok(Some(LineRange {
673 start,
674 end: start.saturating_add(count - 1),
675 }))
676}
677
678fn parse_patch_path(value: &str, prefix: Option<&str>) -> Result<PathBuf> {
679 let decoded = if value.starts_with('"') {
680 decode_git_quoted_path(value)?
681 } else {
682 value.to_string()
683 };
684 let decoded = prefix
685 .and_then(|prefix| decoded.strip_prefix(prefix))
686 .unwrap_or(&decoded);
687 Ok(PathBuf::from(decoded))
688}
689
690fn decode_git_quoted_path(value: &str) -> Result<String> {
691 let Some(inner) = value
692 .strip_prefix('"')
693 .and_then(|value| value.strip_suffix('"'))
694 else {
695 return Err(OkError::Repository(format!(
696 "git patch path has invalid quoting: `{value}`"
697 )));
698 };
699 let mut bytes = Vec::with_capacity(inner.len());
700 let mut chars = inner.as_bytes().iter().copied().peekable();
701 while let Some(byte) = chars.next() {
702 if byte != b'\\' {
703 bytes.push(byte);
704 continue;
705 }
706 let escaped = chars.next().ok_or_else(|| {
707 OkError::Repository(format!("git patch path has a trailing escape: `{value}`"))
708 })?;
709 match escaped {
710 b'\\' | b'"' => bytes.push(escaped),
711 b'a' => bytes.push(0x07),
712 b'b' => bytes.push(0x08),
713 b't' => bytes.push(b'\t'),
714 b'n' => bytes.push(b'\n'),
715 b'v' => bytes.push(0x0b),
716 b'f' => bytes.push(0x0c),
717 b'r' => bytes.push(b'\r'),
718 b'0'..=b'7' => {
719 let mut octal = vec![escaped];
720 for _ in 0..2 {
721 if chars.peek().is_some_and(|byte| matches!(byte, b'0'..=b'7')) {
722 octal.push(chars.next().expect("peeked octal byte"));
723 } else {
724 break;
725 }
726 }
727 let decoded = std::str::from_utf8(&octal)
728 .ok()
729 .and_then(|value| u8::from_str_radix(value, 8).ok())
730 .ok_or_else(|| {
731 OkError::Repository("git patch path contains invalid octal escape".into())
732 })?;
733 bytes.push(decoded);
734 }
735 other => bytes.push(other),
736 }
737 }
738 String::from_utf8(bytes)
739 .map_err(|err| OkError::Repository(format!("git patch path is not UTF-8: {err}")))
740}
741
742fn parse_file_touches(
743 raw: &[u8],
744 commit_id: &GitCommitId,
745 touched_at: DateTime<Utc>,
746) -> Result<Vec<GitFileTouch>> {
747 let mut tokens = raw.split(|byte| *byte == 0);
748 let mut touches = Vec::new();
749 while let Some(status) = next_status(&mut tokens) {
750 let change_kind = change_kind(status);
751 let rename_or_copy = matches!(change_kind, GitChangeKind::Renamed | GitChangeKind::Copied);
752 let first_path = next_path(&mut tokens, commit_id, status)?;
753 let (path, previous_path) = if rename_or_copy {
754 let current_path = next_path(&mut tokens, commit_id, status)?;
755 (current_path, Some(first_path))
756 } else {
757 (first_path, None)
758 };
759 let id = HistoryRecordId::new(format!("file-touch:{}:{}", commit_id.0, touches.len()));
760 touches.push(GitFileTouch {
761 id,
762 commit_id: commit_id.clone(),
763 path,
764 previous_path,
765 change_kind,
766 additions: None,
767 deletions: None,
768 touched_at,
769 });
770 }
771 Ok(touches)
772}
773
774fn next_status<'a>(tokens: &mut impl Iterator<Item = &'a [u8]>) -> Option<&'a [u8]> {
775 tokens
776 .map(trim_status_prefix)
777 .find(|token| !token.is_empty())
778}
779
780fn next_path<'a>(
781 tokens: &mut impl Iterator<Item = &'a [u8]>,
782 commit_id: &GitCommitId,
783 status: &[u8],
784) -> Result<PathBuf> {
785 let path = tokens.find(|token| !token.is_empty()).ok_or_else(|| {
786 OkError::Repository(format!(
787 "git history record for commit `{commit_id}` is missing a path after status `{}`",
788 String::from_utf8_lossy(status)
789 ))
790 })?;
791 Ok(PathBuf::from(git_text(path, "changed path")?))
792}
793
794fn trim_status_prefix(mut value: &[u8]) -> &[u8] {
795 while value
796 .first()
797 .is_some_and(|byte| matches!(byte, b'\r' | b'\n'))
798 {
799 value = &value[1..];
800 }
801 value
802}
803
804fn change_kind(status: &[u8]) -> GitChangeKind {
805 match status.first().copied() {
806 Some(b'A') => GitChangeKind::Added,
807 Some(b'M') => GitChangeKind::Modified,
808 Some(b'D') => GitChangeKind::Deleted,
809 Some(b'R') => GitChangeKind::Renamed,
810 Some(b'C') => GitChangeKind::Copied,
811 Some(b'T') => GitChangeKind::TypeChanged,
812 _ => GitChangeKind::Unknown,
813 }
814}
815
816fn owner(name: String, email: String, role: &str) -> Result<Owner> {
817 let name = name.trim().to_string();
818 let email = email.trim().to_string();
819 let name = if name.is_empty() { email.clone() } else { name };
820 if name.is_empty() {
821 return Err(OkError::Repository(format!(
822 "git history {role} identity is empty"
823 )));
824 }
825 Ok(Owner {
826 name,
827 email: (!email.is_empty()).then_some(email),
828 })
829}
830
831fn git_timestamp(raw: &[u8], field: &str) -> Result<DateTime<Utc>> {
832 let value = git_text(raw, field)?;
833 DateTime::parse_from_rfc3339(&value)
834 .map(|timestamp| timestamp.with_timezone(&Utc))
835 .map_err(|err| {
836 OkError::Repository(format!("git history {field} `{value}` is invalid: {err}"))
837 })
838}
839
840fn git_text(raw: &[u8], field: &str) -> Result<String> {
841 String::from_utf8(raw.to_vec()).map_err(|err| {
842 OkError::Repository(format!("git history {field} is not valid UTF-8: {err}"))
843 })
844}
845
846fn is_test_path(path: &Path) -> bool {
847 let value = path.to_string_lossy().to_ascii_lowercase();
848 value.contains("/test/")
849 || value.contains("/tests/")
850 || value.ends_with("_test.rs")
851 || value.ends_with("_test.go")
852 || value.ends_with(".test.ts")
853 || value.ends_with(".spec.ts")
854 || value.ends_with("test.java")
855 || value.ends_with("tests.java")
856}
857
858#[cfg(test)]
859mod tests {
860 use super::{
861 cochange_records, commit_history, commit_patches, parse_commit_patches,
862 parse_diff_name_status, parse_file_patches, parse_unified_zero_diff,
863 };
864 use open_kioku_core::GitChangeKind;
865 use std::fs;
866 use std::path::Path;
867 use std::process::Command;
868
869 #[test]
870 fn cochange_records_apply_recency_and_test_corun() {
871 let dir = tempfile::tempdir().unwrap();
872 run(dir.path(), &["init"]);
873 run(dir.path(), &["config", "user.email", "test@example.com"]);
874 run(dir.path(), &["config", "user.name", "Test User"]);
875
876 write(dir.path(), "src/old.rs", "fn old() {}\n");
877 write(
878 dir.path(),
879 "tests/old_test.rs",
880 "#[test] fn old_test() {}\n",
881 );
882 run(dir.path(), &["add", "."]);
883 run(dir.path(), &["commit", "-m", "old pair"]);
884
885 write(dir.path(), "src/new.rs", "fn new() {}\n");
886 write(
887 dir.path(),
888 "tests/new_test.rs",
889 "#[test] fn new_test() {}\n",
890 );
891 run(dir.path(), &["add", "."]);
892 run(dir.path(), &["commit", "-m", "new pair"]);
893
894 let records = cochange_records(dir.path(), 20, 10).unwrap();
895 let new_pair = records
896 .iter()
897 .find(|record| {
898 record.path == std::path::Path::new("src/new.rs")
899 && record.cochanged_path == std::path::Path::new("tests/new_test.rs")
900 })
901 .unwrap();
902 let old_pair = records
903 .iter()
904 .find(|record| {
905 record.path == std::path::Path::new("src/old.rs")
906 && record.cochanged_path == std::path::Path::new("tests/old_test.rs")
907 })
908 .unwrap();
909
910 assert!(new_pair.test_corun);
911 assert!(new_pair.recency_weight > old_pair.recency_weight);
912 assert_eq!(new_pair.commit_count, 1);
913 }
914
915 #[test]
916 fn commit_history_respects_window_and_keeps_every_file_touch() {
917 let dir = initialized_repo();
918 write(dir.path(), "src/old.rs", "fn old() {}\n");
919 commit_all(dir.path(), "old");
920 write(dir.path(), "src/a.rs", "fn a() {}\n");
921 write(dir.path(), "src/b.rs", "fn b() {}\n");
922 write(dir.path(), "tests/a_test.rs", "#[test] fn a() {}\n");
923 commit_all(dir.path(), "multi-file change");
924
925 let history = commit_history(dir.path(), 1).unwrap();
926
927 assert_eq!(history.commits.len(), 1);
928 assert_eq!(history.commits[0].summary, "multi-file change");
929 assert_eq!(history.commits[0].author.name, "Test User");
930 assert_eq!(
931 history.commits[0].author.email.as_deref(),
932 Some("test@example.com")
933 );
934 assert_eq!(history.commits[0].file_count, 3);
935 assert_eq!(history.file_touches.len(), 3);
936 assert!(history
937 .file_touches
938 .iter()
939 .all(|touch| touch.commit_id == history.commits[0].id));
940 }
941
942 #[test]
943 fn commit_history_captures_renames() {
944 let dir = initialized_repo();
945 write(dir.path(), "src/old.rs", "fn renamed() {}\n");
946 commit_all(dir.path(), "add old path");
947 run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
948 commit_all(dir.path(), "rename path");
949
950 let history = commit_history(dir.path(), 1).unwrap();
951 let touch = history.file_touches.first().unwrap();
952
953 assert_eq!(touch.change_kind, GitChangeKind::Renamed);
954 assert_eq!(
955 touch.previous_path.as_deref(),
956 Some(Path::new("src/old.rs"))
957 );
958 assert_eq!(touch.path, Path::new("src/new.rs"));
959 }
960
961 #[test]
962 fn commit_history_handles_empty_and_shallow_repositories() {
963 let empty = initialized_repo();
964 assert_eq!(
965 commit_history(empty.path(), 10).unwrap(),
966 super::CommitHistory::empty()
967 );
968
969 let origin = initialized_repo();
970 write(origin.path(), "src/one.rs", "fn one() {}\n");
971 commit_all(origin.path(), "one");
972 write(origin.path(), "src/two.rs", "fn two() {}\n");
973 commit_all(origin.path(), "two");
974
975 let clone_parent = tempfile::tempdir().unwrap();
976 let shallow = clone_parent.path().join("shallow");
977 let source = format!("file://{}", origin.path().canonicalize().unwrap().display());
978 let status = Command::new("git")
979 .args(["clone", "--quiet", "--depth", "1"])
980 .arg(source)
981 .arg(&shallow)
982 .status()
983 .unwrap();
984 assert!(status.success());
985
986 let history = commit_history(&shallow, 10).unwrap();
987 assert_eq!(history.commits.len(), 1);
988 assert_eq!(history.commits[0].summary, "two");
989 }
990
991 #[test]
992 fn commit_patches_capture_zero_context_line_ranges_and_renames() {
993 let dir = initialized_repo();
994 write(
995 dir.path(),
996 "src/old.rs",
997 "fn alpha() {\n one();\n}\n\nfn beta() {\n two();\n}\n",
998 );
999 commit_all(dir.path(), "add symbols");
1000 run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
1001 write(
1002 dir.path(),
1003 "src/new.rs",
1004 "fn alpha() {\n changed();\n}\n\nfn beta() {\n two();\n added();\n}\n",
1005 );
1006 commit_all(dir.path(), "rename and modify");
1007
1008 let patches = commit_patches(dir.path(), 1).unwrap();
1009
1010 assert_eq!(patches.len(), 1);
1011 assert_eq!(patches[0].files.len(), 1);
1012 let file = &patches[0].files[0];
1013 assert_eq!(file.path, Path::new("src/new.rs"));
1014 assert_eq!(file.previous_path.as_deref(), Some(Path::new("src/old.rs")));
1015 assert_eq!(
1016 file.line_ranges,
1017 vec![
1018 open_kioku_core::LineRange { start: 2, end: 2 },
1019 open_kioku_core::LineRange { start: 7, end: 7 }
1020 ]
1021 );
1022 }
1023
1024 #[test]
1025 fn diff_name_status_parser_captures_added_modified_deleted_and_renamed() {
1026 let files = parse_diff_name_status(
1027 "A\tsrc/new.rs\n\
1028 M\tsrc/lib.rs\n\
1029 D\tsrc/old.rs\n\
1030 R087\tsrc/before.rs\tsrc/after.rs\n",
1031 )
1032 .unwrap();
1033
1034 assert_eq!(files.len(), 4);
1035 assert_eq!(files[0].status, GitChangeKind::Added);
1036 assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1037 assert_eq!(files[1].status, GitChangeKind::Modified);
1038 assert_eq!(files[2].status, GitChangeKind::Deleted);
1039 assert_eq!(files[2].old_path.as_deref(), Some(Path::new("src/old.rs")));
1040 assert_eq!(files[3].status, GitChangeKind::Renamed);
1041 assert_eq!(files[3].rename_score, Some(87));
1042 assert_eq!(
1043 files[3].old_path.as_deref(),
1044 Some(Path::new("src/before.rs"))
1045 );
1046 assert_eq!(
1047 files[3].new_path.as_deref(),
1048 Some(Path::new("src/after.rs"))
1049 );
1050 }
1051
1052 #[test]
1053 fn unified_zero_diff_parser_captures_old_new_hunks_and_changed_ranges() {
1054 let files = parse_unified_zero_diff(
1055 "diff --git a/src/old.rs b/src/new.rs\n\
1056 similarity index 92%\n\
1057 rename from src/old.rs\n\
1058 rename to src/new.rs\n\
1059 --- a/src/old.rs\n\
1060 +++ b/src/new.rs\n\
1061 @@ -2 +2 @@\n\
1062 -old();\n\
1063 +new();\n\
1064 @@ -8,0 +9,2 @@\n\
1065 +added();\n\
1066 +again();\n\
1067 diff --git a/src/deleted.rs b/src/deleted.rs\n\
1068 deleted file mode 100644\n\
1069 --- a/src/deleted.rs\n\
1070 +++ /dev/null\n\
1071 @@ -1,3 +0,0 @@\n",
1072 )
1073 .unwrap();
1074
1075 assert_eq!(files.len(), 2);
1076 assert_eq!(files[0].status, GitChangeKind::Renamed);
1077 assert_eq!(files[0].rename_score, Some(92));
1078 assert_eq!(files[0].old_path.as_deref(), Some(Path::new("src/old.rs")));
1079 assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1080 assert_eq!(
1081 files[0].hunks,
1082 vec![
1083 super::DiffHunk {
1084 old_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1085 new_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1086 },
1087 super::DiffHunk {
1088 old_range: None,
1089 new_range: Some(open_kioku_core::LineRange { start: 9, end: 10 }),
1090 }
1091 ]
1092 );
1093 assert_eq!(
1094 files[0].changed_line_ranges(),
1095 vec![
1096 open_kioku_core::LineRange { start: 2, end: 2 },
1097 open_kioku_core::LineRange { start: 9, end: 10 }
1098 ]
1099 );
1100 assert_eq!(files[1].status, GitChangeKind::Deleted);
1101 assert_eq!(
1102 files[1].hunks[0].old_range,
1103 Some(open_kioku_core::LineRange { start: 1, end: 3 })
1104 );
1105 assert_eq!(files[1].hunks[0].new_range, None);
1106 }
1107
1108 #[test]
1109 fn patch_parser_decodes_quoted_paths_and_ignores_deletion_ranges() {
1110 let patches = parse_file_patches(
1111 "diff --git \"a/src/space\\040name.rs\" \"b/src/space\\040name.rs\"\n\
1112 --- \"a/src/space\\040name.rs\"\n\
1113 +++ \"b/src/space\\040name.rs\"\n\
1114 @@ -3,2 +3,0 @@\n\
1115 @@ -8 +6,2 @@\n",
1116 )
1117 .unwrap();
1118
1119 assert_eq!(patches.len(), 1);
1120 assert_eq!(patches[0].path, Path::new("src/space name.rs"));
1121 assert_eq!(
1122 patches[0].line_ranges,
1123 vec![open_kioku_core::LineRange { start: 6, end: 7 }]
1124 );
1125 }
1126
1127 #[test]
1128 fn patch_parser_ignores_record_separator_bytes_inside_diff_content() {
1129 let mut raw = b"\x1e0123456789abcdef0123456789abcdef01234567\x00diff --git a/a.rs b/a.rs\n\
1130 +++ b/a.rs\n\
1131 @@ -0,0 +1 @@\n\
1132 +embedded "
1133 .to_vec();
1134 raw.push(0x1e);
1135 raw.extend_from_slice(b" byte\n");
1136
1137 let patches = parse_commit_patches(&raw).unwrap();
1138
1139 assert_eq!(patches.len(), 1);
1140 assert_eq!(patches[0].files.len(), 1);
1141 assert_eq!(patches[0].files[0].path, Path::new("a.rs"));
1142 }
1143
1144 fn initialized_repo() -> tempfile::TempDir {
1145 let dir = tempfile::tempdir().unwrap();
1146 run(dir.path(), &["init", "--quiet"]);
1147 run(dir.path(), &["config", "user.email", "test@example.com"]);
1148 run(dir.path(), &["config", "user.name", "Test User"]);
1149 run(dir.path(), &["config", "commit.gpgsign", "false"]);
1150 dir
1151 }
1152
1153 fn commit_all(root: &Path, message: &str) {
1154 run(root, &["add", "."]);
1155 run(root, &["commit", "--quiet", "-m", message]);
1156 }
1157
1158 fn write(root: &Path, path: &str, content: &str) {
1159 let path = root.join(path);
1160 fs::create_dir_all(path.parent().unwrap()).unwrap();
1161 fs::write(path, content).unwrap();
1162 }
1163
1164 fn run(root: &Path, args: &[&str]) {
1165 let status = Command::new("git")
1166 .arg("-C")
1167 .arg(root)
1168 .args(args)
1169 .status()
1170 .unwrap();
1171 assert!(status.success(), "git {args:?} failed");
1172 }
1173}