1use chrono::{DateTime, Utc};
2use open_kioku_core::{
3 GitChangeKind, GitCommitId, GitCommitRecord, GitFileTouch, HistoryRecordId, LineRange, Owner,
4};
5use open_kioku_errors::{OkError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::process::Command;
11
12const COMMIT_RECORD_SEPARATOR: u8 = 0x1e;
13const GIT_COMMIT_FORMAT: &str =
14 "--format=%x1e%H%x00%P%x00%an%x00%ae%x00%aI%x00%cn%x00%ce%x00%cI%x00%s%x00%B%x00";
15
16#[derive(Debug, Clone, PartialEq)]
17pub struct CommitHistory {
18 pub commits: Vec<GitCommitRecord>,
19 pub file_touches: Vec<GitFileTouch>,
20}
21
22impl CommitHistory {
23 pub fn empty() -> Self {
24 Self {
25 commits: Vec::new(),
26 file_touches: Vec::new(),
27 }
28 }
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub struct CochangeRecord {
33 pub path: PathBuf,
34 pub cochanged_path: PathBuf,
35 pub commit_count: usize,
36 pub recency_weight: f32,
37 pub test_corun: bool,
38 pub commits: Vec<String>,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct CommitPatch {
43 pub commit_id: GitCommitId,
44 pub files: Vec<FilePatch>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct FilePatch {
49 pub path: PathBuf,
50 pub previous_path: Option<PathBuf>,
51 pub line_ranges: Vec<LineRange>,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub struct DiffFile {
56 pub old_path: Option<PathBuf>,
57 pub new_path: Option<PathBuf>,
58 pub status: GitChangeKind,
59 pub rename_score: Option<u8>,
60 pub hunks: Vec<DiffHunk>,
61}
62
63impl DiffFile {
64 pub fn changed_line_ranges(&self) -> Vec<LineRange> {
65 self.hunks
66 .iter()
67 .filter_map(|hunk| hunk.new_range.clone())
68 .collect()
69 }
70}
71
72#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73pub struct DiffHunk {
74 pub old_range: Option<LineRange>,
75 pub new_range: Option<LineRange>,
76}
77
78pub fn discover_root(start: impl AsRef<Path>) -> Result<PathBuf> {
79 let mut current = start.as_ref().canonicalize()?;
80 loop {
81 if current.join(".git").exists() || current.join("ok.toml").exists() {
82 return Ok(current);
83 }
84 if !current.pop() {
85 return Ok(start.as_ref().canonicalize()?);
86 }
87 }
88}
89
90pub fn branch(root: impl AsRef<Path>) -> Option<String> {
91 let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
92 if let Some(value) = head.strip_prefix("ref: refs/heads/") {
93 return Some(value.trim().to_string());
94 }
95 None
96}
97
98pub fn commit(root: impl AsRef<Path>) -> Option<String> {
99 let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
100 if !head.starts_with("ref: ") {
101 return Some(head.trim().to_string());
102 }
103 let reference = head.trim().strip_prefix("ref: ")?;
104 fs::read_to_string(root.as_ref().join(".git").join(reference))
105 .ok()
106 .map(|value| value.trim().to_string())
107}
108
109pub fn require_repo(root: impl AsRef<Path>) -> Result<PathBuf> {
110 let root = discover_root(root)?;
111 if !root.exists() {
112 return Err(OkError::Repository(format!(
113 "repository root does not exist: {}",
114 root.display()
115 )));
116 }
117 Ok(root)
118}
119
120pub fn cochange_records(
121 root: impl AsRef<Path>,
122 max_commits: usize,
123 max_files_per_commit: usize,
124) -> Result<Vec<CochangeRecord>> {
125 let history = commit_history(root, max_commits)?;
126 Ok(cochange_records_from_history(
127 &history,
128 max_files_per_commit,
129 ))
130}
131
132pub fn commit_history(root: impl AsRef<Path>, max_commits: usize) -> Result<CommitHistory> {
133 let root = root.as_ref();
134 if !root.join(".git").exists() || max_commits == 0 {
135 return Ok(CommitHistory::empty());
136 }
137 let head = Command::new("git")
138 .arg("-C")
139 .arg(root)
140 .args(["rev-parse", "--verify", "HEAD"])
141 .output()
142 .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
143 if !head.status.success() {
144 return Ok(CommitHistory::empty());
145 }
146 let output = Command::new("git")
147 .arg("-C")
148 .arg(root)
149 .arg("log")
150 .arg(format!("--max-count={max_commits}"))
151 .args([
152 "--no-show-signature",
153 "--no-color",
154 "--no-decorate",
155 "--encoding=UTF-8",
156 "--date=iso-strict",
157 "--find-renames",
158 GIT_COMMIT_FORMAT,
159 "--name-status",
160 "-z",
161 ])
162 .output()
163 .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
164 if !output.status.success() {
165 let stderr = String::from_utf8_lossy(&output.stderr);
166 return Err(OkError::Repository(format!(
167 "git history scan failed: {}",
168 stderr.trim()
169 )));
170 }
171 parse_commit_history(&output.stdout)
172}
173
174pub fn commit_patches(root: impl AsRef<Path>, max_commits: usize) -> Result<Vec<CommitPatch>> {
175 let root = root.as_ref();
176 if !root.join(".git").exists() || max_commits == 0 {
177 return Ok(Vec::new());
178 }
179 let head = Command::new("git")
180 .arg("-C")
181 .arg(root)
182 .args(["rev-parse", "--verify", "HEAD"])
183 .output()
184 .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
185 if !head.status.success() {
186 return Ok(Vec::new());
187 }
188 let output = Command::new("git")
189 .arg("-C")
190 .arg(root)
191 .args(["-c", "core.quotePath=true"])
192 .arg("log")
193 .arg(format!("--max-count={max_commits}"))
194 .args([
195 "--no-show-signature",
196 "--no-color",
197 "--no-decorate",
198 "--encoding=UTF-8",
199 "--find-renames",
200 "--format=%x1e%H%x00",
201 "--patch",
202 "--unified=0",
203 "--no-ext-diff",
204 "--no-textconv",
205 ])
206 .output()
207 .map_err(|err| OkError::Repository(format!("git patch scan failed: {err}")))?;
208 if !output.status.success() {
209 let stderr = String::from_utf8_lossy(&output.stderr);
210 return Err(OkError::Repository(format!(
211 "git patch scan failed: {}",
212 stderr.trim()
213 )));
214 }
215 parse_commit_patches(&output.stdout)
216}
217
218pub fn diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
219 run_diff_name_status(root, &[])
220}
221
222pub fn diff_name_status_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
223 run_diff_name_status(root, &[since])
224}
225
226pub fn cached_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
227 run_diff_name_status(root, &["--cached"])
228}
229
230pub fn head_diff_name_status(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
231 run_diff_name_status(root, &["HEAD"])
232}
233
234pub fn diff_unified_zero(root: impl AsRef<Path>) -> Result<Vec<DiffFile>> {
235 run_diff_unified_zero(root, &[])
236}
237
238pub fn diff_unified_zero_since(root: impl AsRef<Path>, since: &str) -> Result<Vec<DiffFile>> {
239 run_diff_unified_zero(root, &[since])
240}
241
242fn run_diff_unified_zero(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
243 let root = root.as_ref();
244 if !root.join(".git").exists() {
245 return Ok(Vec::new());
246 }
247 let output = Command::new("git")
248 .arg("-C")
249 .arg(root)
250 .args(["-c", "core.quotePath=true"])
251 .arg("diff")
252 .args(extra_args)
253 .args(["--unified=0", "--no-ext-diff", "--no-textconv"])
254 .output()
255 .map_err(|err| OkError::Repository(format!("git diff failed: {err}")))?;
256 if !output.status.success() {
257 let stderr = String::from_utf8_lossy(&output.stderr);
258 return Err(OkError::Repository(format!(
259 "git diff failed: {}",
260 stderr.trim()
261 )));
262 }
263 parse_unified_zero_diff(&git_text(&output.stdout, "diff output")?)
264}
265
266fn run_diff_name_status(root: impl AsRef<Path>, extra_args: &[&str]) -> Result<Vec<DiffFile>> {
267 let root = root.as_ref();
268 if !root.join(".git").exists() {
269 return Ok(Vec::new());
270 }
271 let output = Command::new("git")
272 .arg("-C")
273 .arg(root)
274 .arg("diff")
275 .args(extra_args)
276 .args(["--name-status", "--find-renames"])
277 .output()
278 .map_err(|err| OkError::Repository(format!("git diff --name-status failed: {err}")))?;
279 if !output.status.success() {
280 let stderr = String::from_utf8_lossy(&output.stderr);
281 return Err(OkError::Repository(format!(
282 "git diff --name-status failed: {}",
283 stderr.trim()
284 )));
285 }
286 parse_diff_name_status(&git_text(&output.stdout, "diff name-status output")?)
287}
288
289pub fn cochange_records_from_history(
290 history: &CommitHistory,
291 max_files_per_commit: usize,
292) -> Vec<CochangeRecord> {
293 if max_files_per_commit < 2 {
294 return Vec::new();
295 }
296 let mut files_by_commit = HashMap::<&str, Vec<PathBuf>>::new();
297 for touch in &history.file_touches {
298 files_by_commit
299 .entry(touch.commit_id.0.as_str())
300 .or_default()
301 .push(touch.path.clone());
302 }
303 let mut pairs: HashMap<(PathBuf, PathBuf), CochangeRecord> = HashMap::new();
304 for (idx, commit) in history.commits.iter().enumerate() {
305 let mut files = files_by_commit
306 .remove(commit.id.0.as_str())
307 .unwrap_or_default();
308 files.sort();
309 files.dedup();
310 if files.len() < 2 || files.len() > max_files_per_commit {
311 continue;
312 }
313 let recency_weight = 1.0 / (1.0 + idx as f32 / 25.0);
314 for left in &files {
315 for right in &files {
316 if left == right {
317 continue;
318 }
319 let key = (left.clone(), right.clone());
320 let entry = pairs.entry(key).or_insert_with(|| CochangeRecord {
321 path: left.clone(),
322 cochanged_path: right.clone(),
323 commit_count: 0,
324 recency_weight: 0.0,
325 test_corun: is_test_path(right),
326 commits: Vec::new(),
327 });
328 entry.commit_count += 1;
329 entry.recency_weight += recency_weight;
330 entry.test_corun |= is_test_path(right);
331 if entry.commits.len() < 5 {
332 entry.commits.push(commit.id.0.clone());
333 }
334 }
335 }
336 }
337 let mut records = pairs.into_values().collect::<Vec<_>>();
338 records.sort_by(|a, b| {
339 b.recency_weight
340 .partial_cmp(&a.recency_weight)
341 .unwrap_or(std::cmp::Ordering::Equal)
342 .then_with(|| b.commit_count.cmp(&a.commit_count))
343 .then_with(|| a.path.cmp(&b.path))
344 .then_with(|| a.cochanged_path.cmp(&b.cochanged_path))
345 });
346 records
347}
348
349fn parse_commit_history(raw: &[u8]) -> Result<CommitHistory> {
350 let mut history = CommitHistory::empty();
351 for record in raw
352 .split(|byte| *byte == COMMIT_RECORD_SEPARATOR)
353 .filter(|record| !record.is_empty())
354 {
355 let fields = record.splitn(11, |byte| *byte == 0).collect::<Vec<_>>();
356 if fields.len() != 11 {
357 return Err(OkError::Repository(format!(
358 "git history record has {} fields; expected commit metadata and file statuses",
359 fields.len()
360 )));
361 }
362 let sha = git_text(fields[0], "commit id")?;
363 let parent_ids = git_text(fields[1], "parent commit ids")?
364 .split_whitespace()
365 .map(|id| GitCommitId::new(id.to_string()))
366 .collect::<Vec<_>>();
367 let author = owner(
368 git_text(fields[2], "author name")?,
369 git_text(fields[3], "author email")?,
370 "author",
371 )?;
372 let authored_at = git_timestamp(fields[4], "authored timestamp")?;
373 let committer = owner(
374 git_text(fields[5], "committer name")?,
375 git_text(fields[6], "committer email")?,
376 "committer",
377 )?;
378 let committed_at = git_timestamp(fields[7], "committed timestamp")?;
379 let mut summary = git_text(fields[8], "commit summary")?;
380 let message = git_text(fields[9], "commit message")?
381 .trim_end_matches(['\r', '\n'])
382 .to_string();
383 if summary.trim().is_empty() {
384 summary = message.lines().next().unwrap_or_default().to_string();
385 }
386 let commit_id = GitCommitId::new(sha);
387 let mut touches = parse_file_touches(fields[10], &commit_id, committed_at)?;
388 let file_count = touches.len();
389 history.commits.push(GitCommitRecord {
390 id: commit_id,
391 parent_ids,
392 author,
393 committer: Some(committer),
394 authored_at,
395 committed_at,
396 summary,
397 message,
398 file_count,
399 });
400 history.file_touches.append(&mut touches);
401 }
402 Ok(history)
403}
404
405fn parse_commit_patches(raw: &[u8]) -> Result<Vec<CommitPatch>> {
406 let mut commits = Vec::new();
407 let starts = patch_record_starts(raw);
408 if starts.is_empty() && !raw.is_empty() {
409 return Err(OkError::Repository(
410 "git patch output is missing a commit record".into(),
411 ));
412 }
413 for (index, start) in starts.iter().enumerate() {
414 let end = starts.get(index + 1).copied().unwrap_or(raw.len());
415 let record = &raw[start + 1..end];
416 let Some(metadata_end) = record.iter().position(|byte| *byte == 0) else {
417 return Err(OkError::Repository(
418 "git patch record is missing its commit delimiter".into(),
419 ));
420 };
421 let commit_id = GitCommitId::new(git_text(&record[..metadata_end], "commit id")?);
422 let patch = git_text(&record[metadata_end + 1..], "patch")?;
423 commits.push(CommitPatch {
424 commit_id,
425 files: parse_file_patches(&patch)?,
426 });
427 }
428 Ok(commits)
429}
430
431fn patch_record_starts(raw: &[u8]) -> Vec<usize> {
432 raw.iter()
433 .enumerate()
434 .filter_map(|(index, byte)| {
435 if *byte != COMMIT_RECORD_SEPARATOR {
436 return None;
437 }
438 let commit_start = index + 1;
439 [40, 64].into_iter().find_map(|length| {
440 let commit_end = commit_start + length;
441 (raw.get(commit_end) == Some(&0)
442 && raw
443 .get(commit_start..commit_end)
444 .is_some_and(|commit| commit.iter().all(u8::is_ascii_hexdigit)))
445 .then_some(index)
446 })
447 })
448 .collect()
449}
450
451fn parse_file_patches(patch: &str) -> Result<Vec<FilePatch>> {
452 #[derive(Default)]
453 struct PendingPatch {
454 path: Option<PathBuf>,
455 previous_path: Option<PathBuf>,
456 line_ranges: Vec<LineRange>,
457 }
458
459 fn finish(patches: &mut Vec<FilePatch>, pending: &mut PendingPatch) {
460 if let Some(path) = pending.path.take() {
461 patches.push(FilePatch {
462 path,
463 previous_path: pending.previous_path.take(),
464 line_ranges: std::mem::take(&mut pending.line_ranges),
465 });
466 } else {
467 pending.previous_path = None;
468 pending.line_ranges.clear();
469 }
470 }
471
472 let mut patches = Vec::new();
473 let mut pending = PendingPatch::default();
474 for line in patch.lines() {
475 if line.starts_with("diff --git ") {
476 finish(&mut patches, &mut pending);
477 } else if let Some(value) = line.strip_prefix("rename from ") {
478 pending.previous_path = Some(parse_patch_path(value, None)?);
479 } else if let Some(value) = line.strip_prefix("rename to ") {
480 pending.path = Some(parse_patch_path(value, None)?);
481 } else if let Some(value) = line.strip_prefix("+++ ") {
482 if value != "/dev/null" {
483 pending.path = Some(parse_patch_path(value, Some("b/"))?);
484 }
485 } else if line.starts_with("@@ ") {
486 if let Some(range) = parse_new_hunk_range(line)? {
487 pending.line_ranges.push(range);
488 }
489 }
490 }
491 finish(&mut patches, &mut pending);
492 Ok(patches)
493}
494
495fn parse_diff_name_status(raw: &str) -> Result<Vec<DiffFile>> {
496 raw.lines()
497 .filter(|line| !line.trim().is_empty())
498 .map(|line| {
499 let mut fields = line.split('\t').collect::<Vec<_>>();
500 if fields.len() < 2 {
501 fields = line.split_whitespace().collect();
502 }
503 let status = fields.first().copied().unwrap_or_default();
504 if fields.len() < 2 {
505 return Err(OkError::Repository(format!(
506 "git diff name-status entry is missing a path: `{line}`"
507 )));
508 }
509 let kind = change_kind(status.as_bytes());
510 let rename_score = status
511 .strip_prefix('R')
512 .or_else(|| status.strip_prefix('C'))
513 .and_then(|score| score.parse::<u8>().ok());
514 match kind {
515 GitChangeKind::Renamed | GitChangeKind::Copied => {
516 if fields.len() < 3 {
517 return Err(OkError::Repository(format!(
518 "git diff name-status rename is missing paths: `{line}`"
519 )));
520 }
521 Ok(DiffFile {
522 old_path: Some(parse_patch_path(fields[1], None)?),
523 new_path: Some(parse_patch_path(fields[2], None)?),
524 status: kind,
525 rename_score,
526 hunks: Vec::new(),
527 })
528 }
529 GitChangeKind::Deleted => Ok(DiffFile {
530 old_path: Some(parse_patch_path(fields[1], None)?),
531 new_path: None,
532 status: kind,
533 rename_score,
534 hunks: Vec::new(),
535 }),
536 _ => Ok(DiffFile {
537 old_path: None,
538 new_path: Some(parse_patch_path(fields[1], None)?),
539 status: kind,
540 rename_score,
541 hunks: Vec::new(),
542 }),
543 }
544 })
545 .collect()
546}
547
548fn parse_unified_zero_diff(patch: &str) -> Result<Vec<DiffFile>> {
549 #[derive(Default)]
550 struct PendingDiff {
551 old_path: Option<PathBuf>,
552 new_path: Option<PathBuf>,
553 status: Option<GitChangeKind>,
554 rename_score: Option<u8>,
555 hunks: Vec<DiffHunk>,
556 }
557
558 fn finish(files: &mut Vec<DiffFile>, pending: &mut PendingDiff) {
559 if pending.old_path.is_none() && pending.new_path.is_none() {
560 pending.hunks.clear();
561 pending.status = None;
562 pending.rename_score = None;
563 return;
564 }
565 let status = pending.status.unwrap_or_else(|| {
566 if pending.old_path.is_none() {
567 GitChangeKind::Added
568 } else if pending.new_path.is_none() {
569 GitChangeKind::Deleted
570 } else if pending.old_path != pending.new_path {
571 GitChangeKind::Renamed
572 } else {
573 GitChangeKind::Modified
574 }
575 });
576 files.push(DiffFile {
577 old_path: pending.old_path.take(),
578 new_path: pending.new_path.take(),
579 status,
580 rename_score: pending.rename_score.take(),
581 hunks: std::mem::take(&mut pending.hunks),
582 });
583 }
584
585 let mut files = Vec::new();
586 let mut pending = PendingDiff::default();
587 for line in patch.lines() {
588 if line.starts_with("diff --git ") {
589 finish(&mut files, &mut pending);
590 } else if line.starts_with("new file mode ") {
591 pending.status = Some(GitChangeKind::Added);
592 } else if line.starts_with("deleted file mode ") {
593 pending.status = Some(GitChangeKind::Deleted);
594 } else if let Some(score) = line.strip_prefix("similarity index ") {
595 pending.rename_score = score.trim_end_matches('%').parse::<u8>().ok();
596 } else if let Some(value) = line.strip_prefix("rename from ") {
597 pending.old_path = Some(parse_patch_path(value, None)?);
598 pending.status = Some(GitChangeKind::Renamed);
599 } else if let Some(value) = line.strip_prefix("rename to ") {
600 pending.new_path = Some(parse_patch_path(value, None)?);
601 pending.status = Some(GitChangeKind::Renamed);
602 } else if let Some(value) = line.strip_prefix("--- ") {
603 if value != "/dev/null" {
604 pending.old_path = Some(parse_patch_path(value, Some("a/"))?);
605 }
606 } else if let Some(value) = line.strip_prefix("+++ ") {
607 if value != "/dev/null" {
608 pending.new_path = Some(parse_patch_path(value, Some("b/"))?);
609 }
610 } else if line.starts_with("@@ ") {
611 pending.hunks.push(parse_diff_hunk(line)?);
612 }
613 }
614 finish(&mut files, &mut pending);
615 Ok(files)
616}
617
618fn parse_diff_hunk(header: &str) -> Result<DiffHunk> {
619 let old = header
620 .split_whitespace()
621 .find(|part| part.starts_with('-'))
622 .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
623 let new = header
624 .split_whitespace()
625 .find(|part| part.starts_with('+'))
626 .ok_or_else(|| OkError::Repository(format!("git diff hunk is malformed: `{header}`")))?;
627 Ok(DiffHunk {
628 old_range: parse_hunk_range(old.trim_start_matches('-'))?,
629 new_range: parse_hunk_range(new.trim_start_matches('+'))?,
630 })
631}
632
633fn parse_hunk_range(value: &str) -> Result<Option<LineRange>> {
634 let (start, count) = value.split_once(',').unwrap_or((value, "1"));
635 let start = start.parse::<u32>().map_err(|err| {
636 OkError::Repository(format!("git diff hunk start `{start}` is invalid: {err}"))
637 })?;
638 let count = count.parse::<u32>().map_err(|err| {
639 OkError::Repository(format!("git diff hunk count `{count}` is invalid: {err}"))
640 })?;
641 if count == 0 {
642 return Ok(None);
643 }
644 Ok(Some(LineRange {
645 start,
646 end: start.saturating_add(count - 1),
647 }))
648}
649
650fn parse_new_hunk_range(header: &str) -> Result<Option<LineRange>> {
651 let marker = header
652 .split_whitespace()
653 .find(|part| part.starts_with('+'))
654 .ok_or_else(|| OkError::Repository(format!("git patch hunk is malformed: `{header}`")))?;
655 let value = marker.trim_start_matches('+');
656 let (start, count) = value.split_once(',').unwrap_or((value, "1"));
657 let start = start.parse::<u32>().map_err(|err| {
658 OkError::Repository(format!("git patch hunk start `{start}` is invalid: {err}"))
659 })?;
660 let count = count.parse::<u32>().map_err(|err| {
661 OkError::Repository(format!("git patch hunk count `{count}` is invalid: {err}"))
662 })?;
663 if count == 0 {
664 return Ok(None);
665 }
666 Ok(Some(LineRange {
667 start,
668 end: start.saturating_add(count - 1),
669 }))
670}
671
672fn parse_patch_path(value: &str, prefix: Option<&str>) -> Result<PathBuf> {
673 let decoded = if value.starts_with('"') {
674 decode_git_quoted_path(value)?
675 } else {
676 value.to_string()
677 };
678 let decoded = prefix
679 .and_then(|prefix| decoded.strip_prefix(prefix))
680 .unwrap_or(&decoded);
681 Ok(PathBuf::from(decoded))
682}
683
684fn decode_git_quoted_path(value: &str) -> Result<String> {
685 let Some(inner) = value
686 .strip_prefix('"')
687 .and_then(|value| value.strip_suffix('"'))
688 else {
689 return Err(OkError::Repository(format!(
690 "git patch path has invalid quoting: `{value}`"
691 )));
692 };
693 let mut bytes = Vec::with_capacity(inner.len());
694 let mut chars = inner.as_bytes().iter().copied().peekable();
695 while let Some(byte) = chars.next() {
696 if byte != b'\\' {
697 bytes.push(byte);
698 continue;
699 }
700 let escaped = chars.next().ok_or_else(|| {
701 OkError::Repository(format!("git patch path has a trailing escape: `{value}`"))
702 })?;
703 match escaped {
704 b'\\' | b'"' => bytes.push(escaped),
705 b'a' => bytes.push(0x07),
706 b'b' => bytes.push(0x08),
707 b't' => bytes.push(b'\t'),
708 b'n' => bytes.push(b'\n'),
709 b'v' => bytes.push(0x0b),
710 b'f' => bytes.push(0x0c),
711 b'r' => bytes.push(b'\r'),
712 b'0'..=b'7' => {
713 let mut octal = vec![escaped];
714 for _ in 0..2 {
715 if chars.peek().is_some_and(|byte| matches!(byte, b'0'..=b'7')) {
716 octal.push(chars.next().expect("peeked octal byte"));
717 } else {
718 break;
719 }
720 }
721 let decoded = std::str::from_utf8(&octal)
722 .ok()
723 .and_then(|value| u8::from_str_radix(value, 8).ok())
724 .ok_or_else(|| {
725 OkError::Repository("git patch path contains invalid octal escape".into())
726 })?;
727 bytes.push(decoded);
728 }
729 other => bytes.push(other),
730 }
731 }
732 String::from_utf8(bytes)
733 .map_err(|err| OkError::Repository(format!("git patch path is not UTF-8: {err}")))
734}
735
736fn parse_file_touches(
737 raw: &[u8],
738 commit_id: &GitCommitId,
739 touched_at: DateTime<Utc>,
740) -> Result<Vec<GitFileTouch>> {
741 let mut tokens = raw.split(|byte| *byte == 0);
742 let mut touches = Vec::new();
743 while let Some(status) = next_status(&mut tokens) {
744 let change_kind = change_kind(status);
745 let rename_or_copy = matches!(change_kind, GitChangeKind::Renamed | GitChangeKind::Copied);
746 let first_path = next_path(&mut tokens, commit_id, status)?;
747 let (path, previous_path) = if rename_or_copy {
748 let current_path = next_path(&mut tokens, commit_id, status)?;
749 (current_path, Some(first_path))
750 } else {
751 (first_path, None)
752 };
753 let id = HistoryRecordId::new(format!("file-touch:{}:{}", commit_id.0, touches.len()));
754 touches.push(GitFileTouch {
755 id,
756 commit_id: commit_id.clone(),
757 path,
758 previous_path,
759 change_kind,
760 additions: None,
761 deletions: None,
762 touched_at,
763 });
764 }
765 Ok(touches)
766}
767
768fn next_status<'a>(tokens: &mut impl Iterator<Item = &'a [u8]>) -> Option<&'a [u8]> {
769 tokens
770 .map(trim_status_prefix)
771 .find(|token| !token.is_empty())
772}
773
774fn next_path<'a>(
775 tokens: &mut impl Iterator<Item = &'a [u8]>,
776 commit_id: &GitCommitId,
777 status: &[u8],
778) -> Result<PathBuf> {
779 let path = tokens.find(|token| !token.is_empty()).ok_or_else(|| {
780 OkError::Repository(format!(
781 "git history record for commit `{commit_id}` is missing a path after status `{}`",
782 String::from_utf8_lossy(status)
783 ))
784 })?;
785 Ok(PathBuf::from(git_text(path, "changed path")?))
786}
787
788fn trim_status_prefix(mut value: &[u8]) -> &[u8] {
789 while value
790 .first()
791 .is_some_and(|byte| matches!(byte, b'\r' | b'\n'))
792 {
793 value = &value[1..];
794 }
795 value
796}
797
798fn change_kind(status: &[u8]) -> GitChangeKind {
799 match status.first().copied() {
800 Some(b'A') => GitChangeKind::Added,
801 Some(b'M') => GitChangeKind::Modified,
802 Some(b'D') => GitChangeKind::Deleted,
803 Some(b'R') => GitChangeKind::Renamed,
804 Some(b'C') => GitChangeKind::Copied,
805 Some(b'T') => GitChangeKind::TypeChanged,
806 _ => GitChangeKind::Unknown,
807 }
808}
809
810fn owner(name: String, email: String, role: &str) -> Result<Owner> {
811 let name = name.trim().to_string();
812 let email = email.trim().to_string();
813 let name = if name.is_empty() { email.clone() } else { name };
814 if name.is_empty() {
815 return Err(OkError::Repository(format!(
816 "git history {role} identity is empty"
817 )));
818 }
819 Ok(Owner {
820 name,
821 email: (!email.is_empty()).then_some(email),
822 })
823}
824
825fn git_timestamp(raw: &[u8], field: &str) -> Result<DateTime<Utc>> {
826 let value = git_text(raw, field)?;
827 DateTime::parse_from_rfc3339(&value)
828 .map(|timestamp| timestamp.with_timezone(&Utc))
829 .map_err(|err| {
830 OkError::Repository(format!("git history {field} `{value}` is invalid: {err}"))
831 })
832}
833
834fn git_text(raw: &[u8], field: &str) -> Result<String> {
835 String::from_utf8(raw.to_vec()).map_err(|err| {
836 OkError::Repository(format!("git history {field} is not valid UTF-8: {err}"))
837 })
838}
839
840fn is_test_path(path: &Path) -> bool {
841 let value = path.to_string_lossy().to_ascii_lowercase();
842 value.contains("/test/")
843 || value.contains("/tests/")
844 || value.ends_with("_test.rs")
845 || value.ends_with("_test.go")
846 || value.ends_with(".test.ts")
847 || value.ends_with(".spec.ts")
848 || value.ends_with("test.java")
849 || value.ends_with("tests.java")
850}
851
852#[cfg(test)]
853mod tests {
854 use super::{
855 cochange_records, commit_history, commit_patches, parse_commit_patches,
856 parse_diff_name_status, parse_file_patches, parse_unified_zero_diff,
857 };
858 use open_kioku_core::GitChangeKind;
859 use std::fs;
860 use std::path::Path;
861 use std::process::Command;
862
863 #[test]
864 fn cochange_records_apply_recency_and_test_corun() {
865 let dir = tempfile::tempdir().unwrap();
866 run(dir.path(), &["init"]);
867 run(dir.path(), &["config", "user.email", "test@example.com"]);
868 run(dir.path(), &["config", "user.name", "Test User"]);
869
870 write(dir.path(), "src/old.rs", "fn old() {}\n");
871 write(
872 dir.path(),
873 "tests/old_test.rs",
874 "#[test] fn old_test() {}\n",
875 );
876 run(dir.path(), &["add", "."]);
877 run(dir.path(), &["commit", "-m", "old pair"]);
878
879 write(dir.path(), "src/new.rs", "fn new() {}\n");
880 write(
881 dir.path(),
882 "tests/new_test.rs",
883 "#[test] fn new_test() {}\n",
884 );
885 run(dir.path(), &["add", "."]);
886 run(dir.path(), &["commit", "-m", "new pair"]);
887
888 let records = cochange_records(dir.path(), 20, 10).unwrap();
889 let new_pair = records
890 .iter()
891 .find(|record| {
892 record.path == std::path::Path::new("src/new.rs")
893 && record.cochanged_path == std::path::Path::new("tests/new_test.rs")
894 })
895 .unwrap();
896 let old_pair = records
897 .iter()
898 .find(|record| {
899 record.path == std::path::Path::new("src/old.rs")
900 && record.cochanged_path == std::path::Path::new("tests/old_test.rs")
901 })
902 .unwrap();
903
904 assert!(new_pair.test_corun);
905 assert!(new_pair.recency_weight > old_pair.recency_weight);
906 assert_eq!(new_pair.commit_count, 1);
907 }
908
909 #[test]
910 fn commit_history_respects_window_and_keeps_every_file_touch() {
911 let dir = initialized_repo();
912 write(dir.path(), "src/old.rs", "fn old() {}\n");
913 commit_all(dir.path(), "old");
914 write(dir.path(), "src/a.rs", "fn a() {}\n");
915 write(dir.path(), "src/b.rs", "fn b() {}\n");
916 write(dir.path(), "tests/a_test.rs", "#[test] fn a() {}\n");
917 commit_all(dir.path(), "multi-file change");
918
919 let history = commit_history(dir.path(), 1).unwrap();
920
921 assert_eq!(history.commits.len(), 1);
922 assert_eq!(history.commits[0].summary, "multi-file change");
923 assert_eq!(history.commits[0].author.name, "Test User");
924 assert_eq!(
925 history.commits[0].author.email.as_deref(),
926 Some("test@example.com")
927 );
928 assert_eq!(history.commits[0].file_count, 3);
929 assert_eq!(history.file_touches.len(), 3);
930 assert!(history
931 .file_touches
932 .iter()
933 .all(|touch| touch.commit_id == history.commits[0].id));
934 }
935
936 #[test]
937 fn commit_history_captures_renames() {
938 let dir = initialized_repo();
939 write(dir.path(), "src/old.rs", "fn renamed() {}\n");
940 commit_all(dir.path(), "add old path");
941 run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
942 commit_all(dir.path(), "rename path");
943
944 let history = commit_history(dir.path(), 1).unwrap();
945 let touch = history.file_touches.first().unwrap();
946
947 assert_eq!(touch.change_kind, GitChangeKind::Renamed);
948 assert_eq!(
949 touch.previous_path.as_deref(),
950 Some(Path::new("src/old.rs"))
951 );
952 assert_eq!(touch.path, Path::new("src/new.rs"));
953 }
954
955 #[test]
956 fn commit_history_handles_empty_and_shallow_repositories() {
957 let empty = initialized_repo();
958 assert_eq!(
959 commit_history(empty.path(), 10).unwrap(),
960 super::CommitHistory::empty()
961 );
962
963 let origin = initialized_repo();
964 write(origin.path(), "src/one.rs", "fn one() {}\n");
965 commit_all(origin.path(), "one");
966 write(origin.path(), "src/two.rs", "fn two() {}\n");
967 commit_all(origin.path(), "two");
968
969 let clone_parent = tempfile::tempdir().unwrap();
970 let shallow = clone_parent.path().join("shallow");
971 let source = format!("file://{}", origin.path().canonicalize().unwrap().display());
972 let status = Command::new("git")
973 .args(["clone", "--quiet", "--depth", "1"])
974 .arg(source)
975 .arg(&shallow)
976 .status()
977 .unwrap();
978 assert!(status.success());
979
980 let history = commit_history(&shallow, 10).unwrap();
981 assert_eq!(history.commits.len(), 1);
982 assert_eq!(history.commits[0].summary, "two");
983 }
984
985 #[test]
986 fn commit_patches_capture_zero_context_line_ranges_and_renames() {
987 let dir = initialized_repo();
988 write(
989 dir.path(),
990 "src/old.rs",
991 "fn alpha() {\n one();\n}\n\nfn beta() {\n two();\n}\n",
992 );
993 commit_all(dir.path(), "add symbols");
994 run(dir.path(), &["mv", "src/old.rs", "src/new.rs"]);
995 write(
996 dir.path(),
997 "src/new.rs",
998 "fn alpha() {\n changed();\n}\n\nfn beta() {\n two();\n added();\n}\n",
999 );
1000 commit_all(dir.path(), "rename and modify");
1001
1002 let patches = commit_patches(dir.path(), 1).unwrap();
1003
1004 assert_eq!(patches.len(), 1);
1005 assert_eq!(patches[0].files.len(), 1);
1006 let file = &patches[0].files[0];
1007 assert_eq!(file.path, Path::new("src/new.rs"));
1008 assert_eq!(file.previous_path.as_deref(), Some(Path::new("src/old.rs")));
1009 assert_eq!(
1010 file.line_ranges,
1011 vec![
1012 open_kioku_core::LineRange { start: 2, end: 2 },
1013 open_kioku_core::LineRange { start: 7, end: 7 }
1014 ]
1015 );
1016 }
1017
1018 #[test]
1019 fn diff_name_status_parser_captures_added_modified_deleted_and_renamed() {
1020 let files = parse_diff_name_status(
1021 "A\tsrc/new.rs\n\
1022 M\tsrc/lib.rs\n\
1023 D\tsrc/old.rs\n\
1024 R087\tsrc/before.rs\tsrc/after.rs\n",
1025 )
1026 .unwrap();
1027
1028 assert_eq!(files.len(), 4);
1029 assert_eq!(files[0].status, GitChangeKind::Added);
1030 assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1031 assert_eq!(files[1].status, GitChangeKind::Modified);
1032 assert_eq!(files[2].status, GitChangeKind::Deleted);
1033 assert_eq!(files[2].old_path.as_deref(), Some(Path::new("src/old.rs")));
1034 assert_eq!(files[3].status, GitChangeKind::Renamed);
1035 assert_eq!(files[3].rename_score, Some(87));
1036 assert_eq!(
1037 files[3].old_path.as_deref(),
1038 Some(Path::new("src/before.rs"))
1039 );
1040 assert_eq!(
1041 files[3].new_path.as_deref(),
1042 Some(Path::new("src/after.rs"))
1043 );
1044 }
1045
1046 #[test]
1047 fn unified_zero_diff_parser_captures_old_new_hunks_and_changed_ranges() {
1048 let files = parse_unified_zero_diff(
1049 "diff --git a/src/old.rs b/src/new.rs\n\
1050 similarity index 92%\n\
1051 rename from src/old.rs\n\
1052 rename to src/new.rs\n\
1053 --- a/src/old.rs\n\
1054 +++ b/src/new.rs\n\
1055 @@ -2 +2 @@\n\
1056 -old();\n\
1057 +new();\n\
1058 @@ -8,0 +9,2 @@\n\
1059 +added();\n\
1060 +again();\n\
1061 diff --git a/src/deleted.rs b/src/deleted.rs\n\
1062 deleted file mode 100644\n\
1063 --- a/src/deleted.rs\n\
1064 +++ /dev/null\n\
1065 @@ -1,3 +0,0 @@\n",
1066 )
1067 .unwrap();
1068
1069 assert_eq!(files.len(), 2);
1070 assert_eq!(files[0].status, GitChangeKind::Renamed);
1071 assert_eq!(files[0].rename_score, Some(92));
1072 assert_eq!(files[0].old_path.as_deref(), Some(Path::new("src/old.rs")));
1073 assert_eq!(files[0].new_path.as_deref(), Some(Path::new("src/new.rs")));
1074 assert_eq!(
1075 files[0].hunks,
1076 vec![
1077 super::DiffHunk {
1078 old_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1079 new_range: Some(open_kioku_core::LineRange { start: 2, end: 2 }),
1080 },
1081 super::DiffHunk {
1082 old_range: None,
1083 new_range: Some(open_kioku_core::LineRange { start: 9, end: 10 }),
1084 }
1085 ]
1086 );
1087 assert_eq!(
1088 files[0].changed_line_ranges(),
1089 vec![
1090 open_kioku_core::LineRange { start: 2, end: 2 },
1091 open_kioku_core::LineRange { start: 9, end: 10 }
1092 ]
1093 );
1094 assert_eq!(files[1].status, GitChangeKind::Deleted);
1095 assert_eq!(
1096 files[1].hunks[0].old_range,
1097 Some(open_kioku_core::LineRange { start: 1, end: 3 })
1098 );
1099 assert_eq!(files[1].hunks[0].new_range, None);
1100 }
1101
1102 #[test]
1103 fn patch_parser_decodes_quoted_paths_and_ignores_deletion_ranges() {
1104 let patches = parse_file_patches(
1105 "diff --git \"a/src/space\\040name.rs\" \"b/src/space\\040name.rs\"\n\
1106 --- \"a/src/space\\040name.rs\"\n\
1107 +++ \"b/src/space\\040name.rs\"\n\
1108 @@ -3,2 +3,0 @@\n\
1109 @@ -8 +6,2 @@\n",
1110 )
1111 .unwrap();
1112
1113 assert_eq!(patches.len(), 1);
1114 assert_eq!(patches[0].path, Path::new("src/space name.rs"));
1115 assert_eq!(
1116 patches[0].line_ranges,
1117 vec![open_kioku_core::LineRange { start: 6, end: 7 }]
1118 );
1119 }
1120
1121 #[test]
1122 fn patch_parser_ignores_record_separator_bytes_inside_diff_content() {
1123 let mut raw = b"\x1e0123456789abcdef0123456789abcdef01234567\x00diff --git a/a.rs b/a.rs\n\
1124 +++ b/a.rs\n\
1125 @@ -0,0 +1 @@\n\
1126 +embedded "
1127 .to_vec();
1128 raw.push(0x1e);
1129 raw.extend_from_slice(b" byte\n");
1130
1131 let patches = parse_commit_patches(&raw).unwrap();
1132
1133 assert_eq!(patches.len(), 1);
1134 assert_eq!(patches[0].files.len(), 1);
1135 assert_eq!(patches[0].files[0].path, Path::new("a.rs"));
1136 }
1137
1138 fn initialized_repo() -> tempfile::TempDir {
1139 let dir = tempfile::tempdir().unwrap();
1140 run(dir.path(), &["init", "--quiet"]);
1141 run(dir.path(), &["config", "user.email", "test@example.com"]);
1142 run(dir.path(), &["config", "user.name", "Test User"]);
1143 run(dir.path(), &["config", "commit.gpgsign", "false"]);
1144 dir
1145 }
1146
1147 fn commit_all(root: &Path, message: &str) {
1148 run(root, &["add", "."]);
1149 run(root, &["commit", "--quiet", "-m", message]);
1150 }
1151
1152 fn write(root: &Path, path: &str, content: &str) {
1153 let path = root.join(path);
1154 fs::create_dir_all(path.parent().unwrap()).unwrap();
1155 fs::write(path, content).unwrap();
1156 }
1157
1158 fn run(root: &Path, args: &[&str]) {
1159 let status = Command::new("git")
1160 .arg("-C")
1161 .arg(root)
1162 .args(args)
1163 .status()
1164 .unwrap();
1165 assert!(status.success(), "git {args:?} failed");
1166 }
1167}