Skip to main content

tracevault_core/
gitai.rs

1use crate::attribution::{Attribution, FileAttribution, LineRange};
2use crate::diff::{DiffLineKind, FileDiff};
3
4/// A parsed git-ai authorship log.
5#[derive(Debug, Clone)]
6pub struct GitAiAuthorshipLog {
7    pub files: Vec<GitAiFileEntry>,
8    pub metadata: Option<serde_json::Value>,
9}
10
11/// Per-file entry from attestation section.
12#[derive(Debug, Clone)]
13pub struct GitAiFileEntry {
14    pub path: String,
15    /// AI-authored line ranges as (start, end) inclusive, 1-indexed.
16    pub ai_line_ranges: Vec<(u32, u32)>,
17}
18
19/// Parse a git-ai note (from `git notes --ref refs/notes/ai show <sha>`).
20///
21/// Format (v3.0.0):
22/// ```text
23/// path/to/file.rs
24///    session_id 94,102,107,120,122-123,128
25/// another/file.rs
26///    session_id 1-5,10
27/// ---
28/// {"schema_version":"authorship/3.0.0",...}
29/// ```
30/// Line specs are comma-separated numbers and ranges (no +/- prefix).
31pub fn parse_gitai_note(note: &str) -> Option<GitAiAuthorshipLog> {
32    let note = note.trim();
33    if note.is_empty() {
34        return None;
35    }
36
37    let separator_pos = note.find("\n---\n").or_else(|| note.find("\n---"))?;
38    let attestation = &note[..separator_pos];
39    let metadata_str = note[separator_pos..].trim_start_matches('\n').strip_prefix("---")?;
40    let metadata_str = metadata_str.trim();
41
42    let metadata: Option<serde_json::Value> = if metadata_str.is_empty() {
43        None
44    } else {
45        serde_json::from_str(metadata_str).ok()
46    };
47
48    let files = parse_attestation(attestation);
49    if files.is_empty() {
50        return None;
51    }
52
53    Some(GitAiAuthorshipLog { files, metadata })
54}
55
56fn parse_attestation(text: &str) -> Vec<GitAiFileEntry> {
57    let mut files: Vec<GitAiFileEntry> = Vec::new();
58    let mut current_path: Option<String> = None;
59    let mut current_ranges: Vec<(u32, u32)> = Vec::new();
60
61    for line in text.lines() {
62        if line.is_empty() {
63            continue;
64        }
65
66        // Indented lines are session entries (start with whitespace)
67        if line.starts_with(' ') || line.starts_with('\t') {
68            // Format: "  session_id 94,102,107,120,122-123,128"
69            let tokens: Vec<&str> = line.split_whitespace().collect();
70            // tokens[0] = session_id, tokens[1] = comma-separated line specs
71            if tokens.len() >= 2 {
72                for spec in tokens[1].split(',') {
73                    if let Some((start, end)) = parse_line_range(spec) {
74                        current_ranges.push((start, end));
75                    }
76                }
77            }
78        } else {
79            // Non-indented line = file path; flush previous file
80            if let Some(path) = current_path.take() {
81                files.push(GitAiFileEntry {
82                    path,
83                    ai_line_ranges: std::mem::take(&mut current_ranges),
84                });
85            }
86            current_path = Some(line.to_string());
87        }
88    }
89
90    // Flush last file
91    if let Some(path) = current_path {
92        files.push(GitAiFileEntry {
93            path,
94            ai_line_ranges: current_ranges,
95        });
96    }
97
98    files
99}
100
101/// Parse "1-10" as (1, 10) or "20" as (20, 20).
102fn parse_line_range(s: &str) -> Option<(u32, u32)> {
103    if let Some((start_str, end_str)) = s.split_once('-') {
104        let start = start_str.parse().ok()?;
105        let end = end_str.parse().ok()?;
106        Some((start, end))
107    } else {
108        let n = s.parse().ok()?;
109        Some((n, n))
110    }
111}
112
113/// Convert a git-ai authorship log to tracevault's Attribution format.
114/// `diff_files` provides the full diff so we can identify human-written lines
115/// (lines added in the diff but not listed in the git-ai note).
116pub fn gitai_to_attribution(log: &GitAiAuthorshipLog, diff_files: &[FileDiff]) -> Attribution {
117    use std::collections::{HashMap, HashSet};
118
119    // Build a lookup: file path -> set of AI-authored new line numbers
120    let ai_lines_by_file: HashMap<&str, HashSet<u32>> = log
121        .files
122        .iter()
123        .map(|entry| {
124            let mut lines = HashSet::new();
125            for &(start, end) in &entry.ai_line_ranges {
126                for n in start..=end {
127                    lines.insert(n);
128                }
129            }
130            (entry.path.as_str(), lines)
131        })
132        .collect();
133
134    let mut files: Vec<FileAttribution> = Vec::new();
135
136    for diff_file in diff_files {
137        // Collect all added line numbers and deleted count from the diff
138        let mut added_lines: Vec<u32> = Vec::new();
139        let mut deleted_count: u32 = 0;
140
141        for hunk in &diff_file.hunks {
142            for line in &hunk.lines {
143                match line.kind {
144                    DiffLineKind::Add => {
145                        if let Some(n) = line.new_line_number {
146                            added_lines.push(n);
147                        }
148                    }
149                    DiffLineKind::Delete => {
150                        deleted_count += 1;
151                    }
152                    DiffLineKind::Context => {}
153                }
154            }
155        }
156
157        let ai_set = ai_lines_by_file.get(diff_file.path.as_str());
158
159        // Partition added lines into AI vs human
160        let mut ai_line_nums: Vec<u32> = Vec::new();
161        let mut human_line_nums: Vec<u32> = Vec::new();
162
163        for n in &added_lines {
164            if ai_set.map_or(false, |s| s.contains(n)) {
165                ai_line_nums.push(*n);
166            } else {
167                human_line_nums.push(*n);
168            }
169        }
170
171        let ai_lines = collapse_to_ranges(&mut ai_line_nums);
172        let human_lines = collapse_to_ranges(&mut human_line_nums);
173
174        files.push(FileAttribution {
175            path: diff_file.path.clone(),
176            lines_added: added_lines.len() as u32,
177            lines_deleted: deleted_count,
178            ai_lines,
179            human_lines,
180            mixed_lines: vec![],
181        });
182    }
183
184    let summary = crate::attribution_engine::compute_attribution_summary(&files);
185
186    Attribution { files, summary }
187}
188
189/// Collapse a list of line numbers into contiguous `LineRange`s.
190fn collapse_to_ranges(nums: &mut Vec<u32>) -> Vec<LineRange> {
191    if nums.is_empty() {
192        return vec![];
193    }
194    nums.sort_unstable();
195
196    let mut ranges = Vec::new();
197    let mut start = nums[0];
198    let mut end = nums[0];
199
200    for &n in &nums[1..] {
201        if n == end + 1 {
202            end = n;
203        } else {
204            ranges.push(LineRange { start, end });
205            start = n;
206            end = n;
207        }
208    }
209    ranges.push(LineRange { start, end });
210    ranges
211}