Skip to main content

ai_agent/utils/
commit_attribution.rs

1//! Commit attribution utilities for tracking Claude's contributions.
2//!
3//! This module provides functionality to track and attribute file changes
4//! to Claude or human contributors for git commit attribution.
5
6use crate::constants::env::ai;
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9use std::collections::{HashMap, HashSet};
10use std::path::{Path, PathBuf};
11use std::sync::RwLock;
12use std::time::{SystemTime, UNIX_EPOCH};
13
14// ============================================================================
15// Types
16// ============================================================================
17
18/// Attribution state for tracking Claude's contributions to files.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct AttributionState {
21    /// File states keyed by relative path (from cwd)
22    pub file_states: HashMap<String, FileAttributionState>,
23    /// Session baseline states for net change calculation
24    pub session_baselines: HashMap<String, SessionBaseline>,
25    /// Surface from which edits were made
26    pub surface: String,
27    /// HEAD SHA at session start (for detecting external commits)
28    pub starting_head_sha: Option<String>,
29    /// Total prompts in session (for steer count calculation)
30    pub prompt_count: u32,
31    /// Prompts at last commit (to calculate steers for current commit)
32    pub prompt_count_at_last_commit: u32,
33    /// Permission prompt tracking
34    pub permission_prompt_count: u32,
35    pub permission_prompt_count_at_last_commit: u32,
36    /// ESC press tracking (user cancelled permission prompt)
37    pub escape_count: u32,
38    pub escape_count_at_last_commit: u32,
39}
40
41/// Per-file attribution state.
42#[derive(Debug, Clone, Serialize, Deserialize, Default)]
43pub struct FileAttributionState {
44    pub content_hash: String,
45    pub claude_contribution: u64,
46    pub mtime: u64,
47}
48
49/// Session baseline for tracking file state at session start.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SessionBaseline {
52    pub content_hash: String,
53    pub mtime: u64,
54}
55
56/// Summary of Claude's contribution for a commit.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct AttributionSummary {
59    pub claude_percent: u32,
60    pub claude_chars: u64,
61    pub human_chars: u64,
62    pub surfaces: Vec<String>,
63}
64
65/// Per-file attribution details for git notes.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct FileAttribution {
68    pub claude_chars: u64,
69    pub human_chars: u64,
70    pub percent: u32,
71    pub surface: String,
72}
73
74/// Full attribution data for git notes JSON.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct AttributionData {
77    pub version: u32,
78    pub summary: AttributionSummary,
79    pub files: HashMap<String, FileAttribution>,
80    pub surface_breakdown: HashMap<String, SurfaceBreakdown>,
81    pub excluded_generated: Vec<String>,
82    pub sessions: Vec<String>,
83}
84
85/// Surface breakdown for attribution.
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct SurfaceBreakdown {
88    pub claude_chars: u64,
89    pub percent: u32,
90}
91
92/// Attribution snapshot message for persistence.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct AttributionSnapshotMessage {
95    #[serde(rename = "type")]
96    pub message_type: String,
97    pub message_id: String,
98    pub surface: String,
99    pub file_states: HashMap<String, FileAttributionState>,
100    pub prompt_count: u32,
101    pub prompt_count_at_last_commit: u32,
102    pub permission_prompt_count: u32,
103    pub permission_prompt_count_at_last_commit: u32,
104    pub escape_count: u32,
105    pub escape_count_at_last_commit: u32,
106}
107
108// ============================================================================
109// Constants
110// ============================================================================
111
112/// List of repos where internal model names are allowed in trailers.
113/// Includes both SSH and HTTPS URL formats.
114const INTERNAL_MODEL_REPOS: &[&str] = &[
115    "github.com:anthropics/claude-cli-internal",
116    "github.com/anthropics/claude-cli-internal",
117    "github.com:anthropics/anthropic",
118    "github.com/anthropics/anthropic",
119    "github.com:anthropics/apps",
120    "github.com/anthropics/apps",
121    "github.com:anthropics/casino",
122    "github.com/anthropics/casino",
123    "github.com:anthropics/dbt",
124    "github.com/anthropics/dbt",
125    "github.com:anthropics/dotfiles",
126    "github.com/anthropics/dotfiles",
127    "github.com:anthropics/terraform-config",
128    "github.com/anthropics/terraform-config",
129    "github.com:anthropics/hex-export",
130    "github.com/anthropics/hex-export",
131    "github.com:anthropics/feedback-v2",
132    "github.com/anthropics/feedback-v2",
133    "github.com:anthropics/labs",
134    "github.com/anthropics/labs",
135    "github.com:anthropics/argo-rollouts",
136    "github.com/anthropics/argo-rollouts",
137    "github.com:anthropics/starling-configs",
138    "github.com/anthropics/starling-configs",
139    "github.com:anthropics/ts-tools",
140    "github.com/anthropics/ts-tools",
141    "github.com:anthropics/ts-capsules",
142    "github.com/anthropics/ts-capsules",
143    "github.com:anthropics/feldspar-testing",
144    "github.com/anthropics/feldspar-testing",
145    "github.com:anthropics/trellis",
146    "github.com/anthropics/trellis",
147    "github.com:anthropics/claude-for-hiring",
148    "github.com/anthropics/claude-for-hiring",
149    "github.com:anthropics/forge-web",
150    "github.com/anthropics/forge-web",
151    "github.com:anthropics/infra-manifests",
152    "github.com/anthropics/infra-manifests",
153    "github.com:anthropics/mycro_manifests",
154    "github.com/anthropics/mycro_manifests",
155    "github.com:anthropics/mycro_configs",
156    "github.com/anthropics/mycro_configs",
157    "github.com:anthropics/mobile-apps",
158    "github.com/anthropics/mobile-apps",
159];
160
161// ============================================================================
162// Cache for repo classification
163// ============================================================================
164
165/// Cache for repo classification result. Primed once per process.
166/// 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist
167/// 'external' = has a remote, not on allowlist (public/open-source repo)
168/// 'none'     = no remote URL (not a git repo, or no remote configured)
169lazy_static::lazy_static! {
170    static ref REPO_CLASS_CACHE: RwLock<Option<RepoClass>> = RwLock::new(None);
171}
172
173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
174enum RepoClass {
175    Internal,
176    External,
177    None,
178}
179
180// ============================================================================
181// Public Functions
182// ============================================================================
183
184/// Get the repo root for attribution operations.
185/// Uses get_cwd() which respects agent worktree overrides,
186/// then resolves to git root to handle `cd subdir` case.
187/// Falls back to get_original_cwd() if git root can't be determined.
188pub fn get_attribution_repo_root() -> String {
189    // TODO: This needs to be connected to the actual cwd module
190    // For now, return current working directory
191    std::env::current_dir()
192        .map(|p| p.to_string_lossy().to_string())
193        .unwrap_or_else(|_| crate::utils::get_original_cwd())
194}
195
196/// Synchronously return the cached repo classification.
197/// Returns None if the async check hasn't run yet.
198pub fn get_repo_class_cached() -> Option<String> {
199    REPO_CLASS_CACHE.read().ok().and_then(|guard| {
200        guard.map(|c| match c {
201            RepoClass::Internal => "internal".to_string(),
202            RepoClass::External => "external".to_string(),
203            RepoClass::None => "none".to_string(),
204        })
205    })
206}
207
208/// Synchronously return the cached result of is_internal_model_repo().
209/// Returns false if the check hasn't run yet (safe default: don't leak).
210pub fn is_internal_model_repo_cached() -> bool {
211    REPO_CLASS_CACHE
212        .read()
213        .ok()
214        .map(|guard| *guard == Some(RepoClass::Internal))
215        .unwrap_or(false)
216}
217
218/// Sanitize a surface key to use public model names.
219/// Converts internal model variants to their public equivalents.
220pub fn sanitize_surface_key(surface_key: &str) -> String {
221    // Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"])
222    if let Some(slash_index) = surface_key.rfind('/') {
223        let surface = &surface_key[..slash_index];
224        let model = &surface_key[slash_index + 1..];
225        let sanitized_model = sanitize_model_name(model);
226        format!("{}/{}", surface, sanitized_model)
227    } else {
228        surface_key.to_string()
229    }
230}
231
232/// Sanitize a model name to its public equivalent.
233/// Maps internal variants to their public names based on model family.
234pub fn sanitize_model_name(short_name: &str) -> String {
235    // Map internal variants to public equivalents based on model family
236    if short_name.contains("opus-4-6") {
237        return "claude-opus-4-6".to_string();
238    }
239    if short_name.contains("opus-4-5") {
240        return "claude-opus-4-5".to_string();
241    }
242    if short_name.contains("opus-4-1") {
243        return "claude-opus-4-1".to_string();
244    }
245    if short_name.contains("opus-4") {
246        return "claude-opus-4".to_string();
247    }
248    if short_name.contains("sonnet-4-6") {
249        return "claude-sonnet-4-6".to_string();
250    }
251    if short_name.contains("sonnet-4-5") {
252        return "claude-sonnet-4-5".to_string();
253    }
254    if short_name.contains("sonnet-4") {
255        return "claude-sonnet-4".to_string();
256    }
257    if short_name.contains("sonnet-3-7") {
258        return "claude-sonnet-3-7".to_string();
259    }
260    if short_name.contains("haiku-4-5") {
261        return "claude-haiku-4-5".to_string();
262    }
263    if short_name.contains("haiku-3-5") {
264        return "claude-haiku-3-5".to_string();
265    }
266    // Unknown models get a generic name
267    "claude".to_string()
268}
269
270/// Get the current client surface from environment.
271pub fn get_client_surface() -> String {
272    std::env::var(ai::CODE_ENTRYPOINT)
273        .unwrap_or_else(|_| "cli".to_string())
274}
275
276/// Build a surface key that includes the model name.
277/// Format: "surface/model" (e.g., "cli/claude-sonnet")
278pub fn build_surface_key(surface: &str, model: &str) -> String {
279    format!("{}/{}", surface, model)
280}
281
282/// Compute SHA-256 hash of content.
283pub fn compute_content_hash(content: &str) -> String {
284    let mut hasher = Sha256::new();
285    hasher.update(content.as_bytes());
286    let result = hasher.finalize();
287    hex::encode(result)
288}
289
290/// Normalize file path to relative path from cwd for consistent tracking.
291/// Resolves symlinks to handle /tmp vs /private/tmp on macOS.
292pub fn normalize_file_path(file_path: &str) -> String {
293    let cwd = get_attribution_repo_root();
294    let cwd_path = Path::new(&cwd);
295    let file_path_buf = PathBuf::from(file_path);
296
297    if !file_path_buf.is_absolute() {
298        return file_path.to_string();
299    }
300
301    // Resolve symlinks in both paths for consistent comparison
302    // (e.g., /tmp -> /private/tmp on macOS)
303    let resolved_path = std::fs::read_link(&file_path_buf)
304        .map(|p| PathBuf::from(file_path).join(p))
305        .unwrap_or_else(|_| file_path_buf.clone());
306
307    let resolved_cwd = std::fs::read_link(cwd_path)
308        .map(|p| cwd_path.join(p))
309        .unwrap_or_else(|_| cwd_path.to_path_buf());
310
311    let sep = std::path::MAIN_SEPARATOR;
312    if resolved_path.starts_with(&resolved_cwd) || resolved_path == resolved_cwd {
313        // Normalize to forward slashes so keys match git diff output on Windows
314        return resolved_path
315            .strip_prefix(&resolved_cwd)
316            .map(|p| p.to_string_lossy().replace(sep, "/"))
317            .unwrap_or_else(|_| file_path.to_string());
318    }
319
320    // Fallback: try original comparison
321    if file_path.starts_with(&format!("{}{}", cwd, sep)) || file_path == cwd {
322        return PathBuf::from(file_path)
323            .strip_prefix(&cwd)
324            .map(|p| p.to_string_lossy().replace(sep, "/"))
325            .unwrap_or_else(|_| file_path.to_string());
326    }
327
328    file_path.to_string()
329}
330
331/// Expand a relative path to absolute path.
332pub fn expand_file_path(file_path: &str) -> String {
333    if Path::new(file_path).is_absolute() {
334        file_path.to_string()
335    } else {
336        let repo_root = get_attribution_repo_root();
337        Path::new(&repo_root)
338            .join(file_path)
339            .to_string_lossy()
340            .to_string()
341    }
342}
343
344/// Create an empty attribution state for a new session.
345pub fn create_empty_attribution_state() -> AttributionState {
346    AttributionState {
347        file_states: HashMap::new(),
348        session_baselines: HashMap::new(),
349        surface: get_client_surface(),
350        starting_head_sha: None,
351        prompt_count: 0,
352        prompt_count_at_last_commit: 0,
353        permission_prompt_count: 0,
354        permission_prompt_count_at_last_commit: 0,
355        escape_count: 0,
356        escape_count_at_last_commit: 0,
357    }
358}
359
360/// Track a file modification by Claude.
361/// Called after Edit/Write tool completes.
362pub fn track_file_modification(
363    state: AttributionState,
364    file_path: &str,
365    old_content: &str,
366    new_content: &str,
367    _user_modified: bool,
368    mtime: Option<u64>,
369) -> AttributionState {
370    let normalized_path = normalize_file_path(file_path);
371    let mtime = mtime.unwrap_or_else(current_timestamp);
372
373    let new_file_state = compute_file_modification_state(
374        &state.file_states,
375        file_path,
376        old_content,
377        new_content,
378        mtime,
379    );
380
381    if new_file_state.is_none() {
382        return state;
383    }
384
385    let mut new_file_states = state.file_states.clone();
386    new_file_states.insert(normalized_path, new_file_state.unwrap());
387
388    AttributionState {
389        file_states: new_file_states,
390        ..state
391    }
392}
393
394/// Track a file creation by Claude (e.g., via bash command).
395/// Used when Claude creates a new file through a non-tracked mechanism.
396pub fn track_file_creation(
397    state: AttributionState,
398    file_path: &str,
399    content: &str,
400    mtime: Option<u64>,
401) -> AttributionState {
402    // A creation is simply a modification from empty to the new content
403    track_file_modification(state, file_path, "", content, false, mtime)
404}
405
406/// Track a file deletion by Claude (e.g., via bash rm command).
407/// Used when Claude deletes a file through a non-tracked mechanism.
408pub fn track_file_deletion(
409    state: AttributionState,
410    file_path: &str,
411    old_content: &str,
412) -> AttributionState {
413    let normalized_path = normalize_file_path(file_path);
414    let existing_state = state.file_states.get(&normalized_path);
415    let existing_contribution = existing_state.map(|s| s.claude_contribution).unwrap_or(0);
416    let deleted_chars = old_content.len() as u64;
417
418    let new_file_state = FileAttributionState {
419        content_hash: String::new(), // Empty hash for deleted files
420        claude_contribution: existing_contribution + deleted_chars,
421        mtime: current_timestamp(),
422    };
423
424    let mut new_file_states = state.file_states.clone();
425    new_file_states.insert(normalized_path, new_file_state);
426
427    AttributionState {
428        file_states: new_file_states,
429        ..state
430    }
431}
432
433/// Track multiple file changes in bulk.
434pub fn track_bulk_file_changes(
435    state: AttributionState,
436    changes: Vec<FileChange>,
437) -> AttributionState {
438    // Create ONE copy of the HashMap, then mutate it for each file
439    let mut new_file_states = state.file_states.clone();
440
441    for change in changes {
442        let mtime = change.mtime.unwrap_or_else(current_timestamp);
443        if change.change_type == FileChangeType::Deleted {
444            let normalized_path = normalize_file_path(&change.path);
445            let existing_state = new_file_states.get(&normalized_path);
446            let existing_contribution = existing_state.map(|s| s.claude_contribution).unwrap_or(0);
447            let deleted_chars = change.old_content.len() as u64;
448
449            new_file_states.insert(
450                normalized_path,
451                FileAttributionState {
452                    content_hash: String::new(),
453                    claude_contribution: existing_contribution + deleted_chars,
454                    mtime,
455                },
456            );
457        } else {
458            let new_file_state = compute_file_modification_state(
459                &new_file_states,
460                &change.path,
461                &change.old_content,
462                &change.new_content,
463                mtime,
464            );
465            if let Some(file_state) = new_file_state {
466                let normalized_path = normalize_file_path(&change.path);
467                new_file_states.insert(normalized_path, file_state);
468            }
469        }
470    }
471
472    AttributionState {
473        file_states: new_file_states,
474        ..state
475    }
476}
477
478/// File change for bulk tracking.
479#[derive(Debug, Clone)]
480pub struct FileChange {
481    pub path: String,
482    pub change_type: FileChangeType,
483    pub old_content: String,
484    pub new_content: String,
485    pub mtime: Option<u64>,
486}
487
488#[derive(Debug, Clone, Copy, PartialEq, Eq)]
489pub enum FileChangeType {
490    Modified,
491    Created,
492    Deleted,
493}
494
495/// Convert attribution state to snapshot message for persistence.
496pub fn state_to_snapshot_message(
497    state: &AttributionState,
498    message_id: &str,
499) -> AttributionSnapshotMessage {
500    AttributionSnapshotMessage {
501        message_type: "attribution-snapshot".to_string(),
502        message_id: message_id.to_string(),
503        surface: state.surface.clone(),
504        file_states: state.file_states.clone(),
505        prompt_count: state.prompt_count,
506        prompt_count_at_last_commit: state.prompt_count_at_last_commit,
507        permission_prompt_count: state.permission_prompt_count,
508        permission_prompt_count_at_last_commit: state.permission_prompt_count_at_last_commit,
509        escape_count: state.escape_count,
510        escape_count_at_last_commit: state.escape_count_at_last_commit,
511    }
512}
513
514/// Restore attribution state from snapshot messages.
515pub fn restore_attribution_state_from_snapshots(
516    snapshots: &[AttributionSnapshotMessage],
517) -> AttributionState {
518    let mut state = create_empty_attribution_state();
519
520    // Snapshots are full-state dumps, not deltas.
521    // The last snapshot has the most recent count for every path.
522    let Some(last_snapshot) = snapshots.last() else {
523        return state;
524    };
525
526    state.surface = last_snapshot.surface.clone();
527    state.file_states = last_snapshot.file_states.clone();
528
529    // Restore prompt counts from the last snapshot (most recent state)
530    state.prompt_count = last_snapshot.prompt_count;
531    state.prompt_count_at_last_commit = last_snapshot.prompt_count_at_last_commit;
532    state.permission_prompt_count = last_snapshot.permission_prompt_count;
533    state.permission_prompt_count_at_last_commit =
534        last_snapshot.permission_prompt_count_at_last_commit;
535    state.escape_count = last_snapshot.escape_count;
536    state.escape_count_at_last_commit = last_snapshot.escape_count_at_last_commit;
537
538    state
539}
540
541/// Restore attribution state from log snapshots on session resume.
542pub fn attribution_restore_state_from_log<F>(
543    attribution_snapshots: Vec<AttributionSnapshotMessage>,
544    on_update_state: F,
545) where
546    F: Fn(AttributionState),
547{
548    let state = restore_attribution_state_from_snapshots(&attribution_snapshots);
549    on_update_state(state);
550}
551
552/// Increment promptCount and save an attribution snapshot.
553/// Used to persist the prompt count across compaction.
554pub fn increment_prompt_count(
555    attribution: AttributionState,
556    save_snapshot: impl Fn(AttributionSnapshotMessage),
557) -> AttributionState {
558    let new_attribution = AttributionState {
559        prompt_count: attribution.prompt_count + 1,
560        ..attribution
561    };
562    let snapshot = state_to_snapshot_message(&new_attribution, &uuid::Uuid::new_v4().to_string());
563    save_snapshot(snapshot);
564    new_attribution
565}
566
567// ============================================================================
568// Private Functions
569// ============================================================================
570
571/// Compute the character contribution for a file modification.
572/// Returns the FileAttributionState to store, or None if tracking failed.
573fn compute_file_modification_state(
574    existing_file_states: &HashMap<String, FileAttributionState>,
575    file_path: &str,
576    old_content: &str,
577    new_content: &str,
578    mtime: u64,
579) -> Option<FileAttributionState> {
580    let normalized_path = normalize_file_path(file_path);
581
582    // Calculate Claude's character contribution
583    let claude_contribution: u64;
584
585    if old_content.is_empty() || new_content.is_empty() {
586        // New file or full deletion - contribution is the content length
587        claude_contribution = if old_content.is_empty() {
588            new_content.len() as u64
589        } else {
590            old_content.len() as u64
591        };
592    } else {
593        // Find actual changed region via common prefix/suffix matching.
594        // This correctly handles same-length replacements (e.g., "Esc" -> "esc")
595        // where Math.abs(newLen - oldLen) would be 0.
596        let min_len = old_content.len().min(new_content.len());
597        let mut prefix_end = 0;
598        while prefix_end < min_len
599            && old_content.as_bytes()[prefix_end] == new_content.as_bytes()[prefix_end]
600        {
601            prefix_end += 1;
602        }
603
604        let mut suffix_len = 0;
605        while suffix_len < min_len - prefix_end
606            && old_content.as_bytes()[old_content.len() - 1 - suffix_len]
607                == new_content.as_bytes()[new_content.len() - 1 - suffix_len]
608        {
609            suffix_len += 1;
610        }
611
612        let old_changed_len = old_content.len() - prefix_end - suffix_len;
613        let new_changed_len = new_content.len() - prefix_end - suffix_len;
614        claude_contribution = old_changed_len.max(new_changed_len) as u64;
615    }
616
617    // Get current file state if it exists
618    let existing_contribution = existing_file_states
619        .get(&normalized_path)
620        .map(|s| s.claude_contribution)
621        .unwrap_or(0);
622
623    Some(FileAttributionState {
624        content_hash: compute_content_hash(new_content),
625        claude_contribution: existing_contribution + claude_contribution,
626        mtime,
627    })
628}
629
630/// Get current timestamp in milliseconds since epoch.
631fn current_timestamp() -> u64 {
632    SystemTime::now()
633        .duration_since(UNIX_EPOCH)
634        .map(|d| d.as_millis() as u64)
635        .unwrap_or(0)
636}
637
638// ============================================================================
639// Async Functions (placeholders - would need async runtime integration)
640// ============================================================================
641
642/// Check if the current repo is in the allowlist for internal model names.
643/// This is a placeholder - would need proper async integration.
644pub async fn is_internal_model_repo() -> bool {
645    // Check cache first
646    if let Some(class) = REPO_CLASS_CACHE.read().ok().and_then(|g| *g) {
647        return class == RepoClass::Internal;
648    }
649
650    let cwd = get_attribution_repo_root();
651
652    // TODO: Implement actual async check with get_remote_url_for_dir
653    // For now, return false (safe default: don't leak)
654    let _ = cwd;
655    false
656}
657
658/// Get a file's modification time (mtimeMs), falling back to Date.now() if
659/// the file doesn't exist.
660pub async fn get_file_mtime(file_path: &str) -> u64 {
661    let normalized_path = normalize_file_path(file_path);
662    let abs_path = expand_file_path(&normalized_path);
663
664    std::fs::metadata(&abs_path)
665        .and_then(|m| m.modified())
666        .ok()
667        .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
668        .map(|d| d.as_millis() as u64)
669        .unwrap_or_else(current_timestamp)
670}
671
672/// Calculate final attribution for staged files.
673/// Compares session baseline to committed state.
674pub async fn calculate_commit_attribution(
675    states: Vec<AttributionState>,
676    staged_files: Vec<String>,
677) -> AttributionData {
678    let cwd = get_attribution_repo_root();
679    // TODO: Get actual session ID
680    let session_id = uuid::Uuid::new_v4().to_string();
681
682    let mut files: HashMap<String, FileAttribution> = HashMap::new();
683    let mut excluded_generated: Vec<String> = Vec::new();
684    let mut surfaces: HashSet<String> = HashSet::new();
685    let mut surface_counts: HashMap<String, u64> = HashMap::new();
686
687    let mut total_claude_chars: u64 = 0;
688    let mut total_human_chars: u64 = 0;
689
690    // Merge file states from all sessions
691    let mut merged_file_states: HashMap<String, FileAttributionState> = HashMap::new();
692    let mut merged_baselines: HashMap<String, SessionBaseline> = HashMap::new();
693
694    for state in &states {
695        surfaces.insert(state.surface.clone());
696
697        // Merge baselines (earliest baseline wins)
698        for (path, baseline) in &state.session_baselines {
699            if !merged_baselines.contains_key(path) {
700                merged_baselines.insert(path.clone(), baseline.clone());
701            }
702        }
703
704        // Merge file states (accumulate contributions)
705        for (path, file_state) in &state.file_states {
706            if let Some(existing) = merged_file_states.get(path) {
707                merged_file_states.insert(
708                    path.clone(),
709                    FileAttributionState {
710                        content_hash: file_state.content_hash.clone(),
711                        claude_contribution: existing.claude_contribution
712                            + file_state.claude_contribution,
713                        mtime: file_state.mtime,
714                    },
715                );
716            } else {
717                merged_file_states.insert(path.clone(), file_state.clone());
718            }
719        }
720    }
721
722    // Process each staged file
723    for file in staged_files {
724        // Skip generated files (placeholder - would need is_generated_file)
725        // if is_generated_file(&file) {
726        //     excluded_generated.push(file.clone());
727        //     continue;
728        // }
729
730        let abs_path = PathBuf::from(&cwd).join(&file);
731        let file_state = merged_file_states.get(&file);
732        let baseline = merged_baselines.get(&file);
733
734        // Get the surface for this file
735        let file_surface = states
736            .first()
737            .map(|s| s.surface.clone())
738            .unwrap_or_else(get_client_surface);
739
740        let (mut claude_chars, mut human_chars) = (0u64, 0u64);
741
742        // Check if file was deleted (placeholder - would need is_file_deleted)
743        // For now, check if file exists
744        let deleted = !abs_path.exists();
745
746        if deleted {
747            // File was deleted
748            if let Some(state) = file_state {
749                claude_chars = state.claude_contribution;
750                human_chars = 0;
751            } else {
752                // Human deleted this file - use diff size estimation
753                human_chars = 100; // Minimum attribution for a deletion
754            }
755        } else {
756            // File exists - use file size as proxy for char count
757            if let Ok(stats) = std::fs::metadata(&abs_path) {
758                if file_state.is_some() {
759                    // We have tracked modifications for this file
760                    claude_chars = file_state.map(|s| s.claude_contribution).unwrap_or(0);
761                    human_chars = 0;
762                } else if baseline.is_some() {
763                    // File was modified but not tracked - human modification
764                    human_chars = stats.len() as u64;
765                } else {
766                    // New file not created by Claude
767                    human_chars = stats.len() as u64;
768                }
769            }
770        }
771
772        // Ensure non-negative values
773        claude_chars = claude_chars.max(0);
774        human_chars = human_chars.max(0);
775
776        let total = claude_chars + human_chars;
777        let percent = if total > 0 {
778            ((claude_chars as f64 / total as f64) * 100.0).round() as u32
779        } else {
780            0
781        };
782
783        files.insert(
784            file.clone(),
785            FileAttribution {
786                claude_chars,
787                human_chars,
788                percent,
789                surface: file_surface.clone(),
790            },
791        );
792
793        total_claude_chars += claude_chars;
794        total_human_chars += human_chars;
795
796        *surface_counts.entry(file_surface).or_insert(0) += claude_chars;
797    }
798
799    let total_chars = total_claude_chars + total_human_chars;
800    let claude_percent = if total_chars > 0 {
801        ((total_claude_chars as f64 / total_chars as f64) * 100.0).round() as u32
802    } else {
803        0
804    };
805
806    // Calculate surface breakdown (percentage of total content per surface)
807    let mut surface_breakdown: HashMap<String, SurfaceBreakdown> = HashMap::new();
808    for (surface, chars) in surface_counts {
809        let percent = if total_chars > 0 {
810            ((chars as f64 / total_chars as f64) * 100.0).round() as u32
811        } else {
812            0
813        };
814        surface_breakdown.insert(
815            surface,
816            SurfaceBreakdown {
817                claude_chars: chars,
818                percent,
819            },
820        );
821    }
822
823    AttributionData {
824        version: 1,
825        summary: AttributionSummary {
826            claude_percent,
827            claude_chars: total_claude_chars,
828            human_chars: total_human_chars,
829            surfaces: surfaces.into_iter().collect(),
830        },
831        files,
832        surface_breakdown,
833        excluded_generated,
834        sessions: vec![session_id],
835    }
836}
837
838/// Get staged files from git.
839pub async fn get_staged_files() -> Vec<String> {
840    // TODO: Implement with actual git command
841    // For now, return empty
842    Vec::new()
843}
844
845#[cfg(test)]
846mod tests {
847    use super::*;
848
849    #[test]
850    fn test_sanitize_model_name() {
851        assert_eq!(sanitize_model_name("opus-4-5-fast"), "claude-opus-4-5");
852        assert_eq!(sanitize_model_name("sonnet-4"), "claude-sonnet-4");
853        assert_eq!(sanitize_model_name("unknown"), "claude");
854    }
855
856    #[test]
857    fn test_sanitize_surface_key() {
858        assert_eq!(
859            sanitize_surface_key("cli/opus-4-5-fast"),
860            "cli/claude-opus-4-5"
861        );
862        assert_eq!(sanitize_surface_key("cli"), "cli");
863    }
864
865    #[test]
866    fn test_compute_content_hash() {
867        let hash1 = compute_content_hash("hello");
868        let hash2 = compute_content_hash("hello");
869        let hash3 = compute_content_hash("world");
870        assert_eq!(hash1, hash2);
871        assert_ne!(hash1, hash3);
872    }
873
874    #[test]
875    fn test_normalize_file_path() {
876        // Test relative path stays relative
877        assert_eq!(normalize_file_path("test.rs"), "test.rs");
878
879        // Test absolute path normalization (depends on current dir)
880        let abs_path = std::env::current_dir()
881            .unwrap()
882            .join("test.rs")
883            .to_string_lossy()
884            .to_string();
885        let normalized = normalize_file_path(&abs_path);
886        assert!(normalized.ends_with("test.rs") || normalized == abs_path);
887    }
888
889    #[test]
890    fn test_create_empty_attribution_state() {
891        let state = create_empty_attribution_state();
892        assert!(state.file_states.is_empty());
893        assert_eq!(state.prompt_count, 0);
894    }
895
896    #[test]
897    fn test_track_file_creation() {
898        let state = create_empty_attribution_state();
899        let state = track_file_creation(state, "test.rs", "fn main() {}", None);
900        assert!(state.file_states.contains_key("test.rs"));
901    }
902
903    #[test]
904    fn test_track_file_modification() {
905        let state = create_empty_attribution_state();
906        let state = track_file_modification(state, "test.rs", "", "fn main() {}", false, None);
907        assert!(state.file_states.contains_key("test.rs"));
908    }
909}