Skip to main content

ai_agent/utils/
commit_attribution.rs

1//! Commit attribution utilities for tracking Claude's contributions.
2//!
3//! This module provides functionality to track and attribute file changes
4//! to Claude or human contributors for git commit attribution.
5
6use crate::constants::env::ai;
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9use std::collections::{HashMap, HashSet};
10use std::path::{Path, PathBuf};
11use std::sync::RwLock;
12use std::time::{SystemTime, UNIX_EPOCH};
13
14// ============================================================================
15// Types
16// ============================================================================
17
18/// Attribution state for tracking Claude's contributions to files.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct AttributionState {
21    /// File states keyed by relative path (from cwd)
22    pub file_states: HashMap<String, FileAttributionState>,
23    /// Session baseline states for net change calculation
24    pub session_baselines: HashMap<String, SessionBaseline>,
25    /// Surface from which edits were made
26    pub surface: String,
27    /// HEAD SHA at session start (for detecting external commits)
28    pub starting_head_sha: Option<String>,
29    /// Total prompts in session (for steer count calculation)
30    pub prompt_count: u32,
31    /// Prompts at last commit (to calculate steers for current commit)
32    pub prompt_count_at_last_commit: u32,
33    /// Permission prompt tracking
34    pub permission_prompt_count: u32,
35    pub permission_prompt_count_at_last_commit: u32,
36    /// ESC press tracking (user cancelled permission prompt)
37    pub escape_count: u32,
38    pub escape_count_at_last_commit: u32,
39}
40
41/// Per-file attribution state.
42#[derive(Debug, Clone, Serialize, Deserialize, Default)]
43pub struct FileAttributionState {
44    pub content_hash: String,
45    pub claude_contribution: u64,
46    pub mtime: u64,
47}
48
49/// Session baseline for tracking file state at session start.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SessionBaseline {
52    pub content_hash: String,
53    pub mtime: u64,
54}
55
56/// Summary of Claude's contribution for a commit.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct AttributionSummary {
59    pub claude_percent: u32,
60    pub claude_chars: u64,
61    pub human_chars: u64,
62    pub surfaces: Vec<String>,
63}
64
65/// Per-file attribution details for git notes.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct FileAttribution {
68    pub claude_chars: u64,
69    pub human_chars: u64,
70    pub percent: u32,
71    pub surface: String,
72}
73
74/// Full attribution data for git notes JSON.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct AttributionData {
77    pub version: u32,
78    pub summary: AttributionSummary,
79    pub files: HashMap<String, FileAttribution>,
80    pub surface_breakdown: HashMap<String, SurfaceBreakdown>,
81    pub excluded_generated: Vec<String>,
82    pub sessions: Vec<String>,
83}
84
85/// Surface breakdown for attribution.
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct SurfaceBreakdown {
88    pub claude_chars: u64,
89    pub percent: u32,
90}
91
92/// Attribution snapshot message for persistence.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct AttributionSnapshotMessage {
95    #[serde(rename = "type")]
96    pub message_type: String,
97    pub message_id: String,
98    pub surface: String,
99    pub file_states: HashMap<String, FileAttributionState>,
100    pub prompt_count: u32,
101    pub prompt_count_at_last_commit: u32,
102    pub permission_prompt_count: u32,
103    pub permission_prompt_count_at_last_commit: u32,
104    pub escape_count: u32,
105    pub escape_count_at_last_commit: u32,
106}
107
108// ============================================================================
109// Constants
110// ============================================================================
111
112/// List of repos where internal model names are allowed in trailers.
113/// Includes both SSH and HTTPS URL formats.
114const INTERNAL_MODEL_REPOS: &[&str] = &[
115    "github.com:anthropics/claude-cli-internal",
116    "github.com/anthropics/claude-cli-internal",
117    "github.com:anthropics/anthropic",
118    "github.com/anthropics/anthropic",
119    "github.com:anthropics/apps",
120    "github.com/anthropics/apps",
121    "github.com:anthropics/casino",
122    "github.com/anthropics/casino",
123    "github.com:anthropics/dbt",
124    "github.com/anthropics/dbt",
125    "github.com:anthropics/dotfiles",
126    "github.com/anthropics/dotfiles",
127    "github.com:anthropics/terraform-config",
128    "github.com/anthropics/terraform-config",
129    "github.com:anthropics/hex-export",
130    "github.com/anthropics/hex-export",
131    "github.com:anthropics/feedback-v2",
132    "github.com/anthropics/feedback-v2",
133    "github.com:anthropics/labs",
134    "github.com/anthropics/labs",
135    "github.com:anthropics/argo-rollouts",
136    "github.com/anthropics/argo-rollouts",
137    "github.com:anthropics/starling-configs",
138    "github.com/anthropics/starling-configs",
139    "github.com:anthropics/ts-tools",
140    "github.com/anthropics/ts-tools",
141    "github.com:anthropics/ts-capsules",
142    "github.com/anthropics/ts-capsules",
143    "github.com:anthropics/feldspar-testing",
144    "github.com/anthropics/feldspar-testing",
145    "github.com:anthropics/trellis",
146    "github.com/anthropics/trellis",
147    "github.com:anthropics/claude-for-hiring",
148    "github.com/anthropics/claude-for-hiring",
149    "github.com:anthropics/forge-web",
150    "github.com/anthropics/forge-web",
151    "github.com:anthropics/infra-manifests",
152    "github.com/anthropics/infra-manifests",
153    "github.com:anthropics/mycro_manifests",
154    "github.com/anthropics/mycro_manifests",
155    "github.com:anthropics/mycro_configs",
156    "github.com/anthropics/mycro_configs",
157    "github.com:anthropics/mobile-apps",
158    "github.com/anthropics/mobile-apps",
159];
160
161// ============================================================================
162// Cache for repo classification
163// ============================================================================
164
165/// Cache for repo classification result. Primed once per process.
166/// 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist
167/// 'external' = has a remote, not on allowlist (public/open-source repo)
168/// 'none'     = no remote URL (not a git repo, or no remote configured)
169lazy_static::lazy_static! {
170    static ref REPO_CLASS_CACHE: RwLock<Option<RepoClass>> = RwLock::new(None);
171}
172
173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
174enum RepoClass {
175    Internal,
176    External,
177    None,
178}
179
180// ============================================================================
181// Public Functions
182// ============================================================================
183
184/// Get the repo root for attribution operations.
185/// Uses get_cwd() which respects agent worktree overrides,
186/// then resolves to git root to handle `cd subdir` case.
187/// Falls back to get_original_cwd() if git root can't be determined.
188pub fn get_attribution_repo_root() -> String {
189    std::env::current_dir()
190        .map(|p| p.to_string_lossy().to_string())
191        .unwrap_or_else(|_| {
192            crate::utils::get_original_cwd()
193                .to_string_lossy()
194                .to_string()
195        })
196}
197
198/// Synchronously return the cached repo classification.
199/// Returns None if the async check hasn't run yet.
200pub fn get_repo_class_cached() -> Option<String> {
201    REPO_CLASS_CACHE.read().ok().and_then(|guard| {
202        guard.map(|c| match c {
203            RepoClass::Internal => "internal".to_string(),
204            RepoClass::External => "external".to_string(),
205            RepoClass::None => "none".to_string(),
206        })
207    })
208}
209
210/// Synchronously return the cached result of is_internal_model_repo().
211/// Returns false if the check hasn't run yet (safe default: don't leak).
212pub fn is_internal_model_repo_cached() -> bool {
213    REPO_CLASS_CACHE
214        .read()
215        .ok()
216        .map(|guard| *guard == Some(RepoClass::Internal))
217        .unwrap_or(false)
218}
219
220/// Sanitize a surface key to use public model names.
221/// Converts internal model variants to their public equivalents.
222pub fn sanitize_surface_key(surface_key: &str) -> String {
223    // Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"])
224    if let Some(slash_index) = surface_key.rfind('/') {
225        let surface = &surface_key[..slash_index];
226        let model = &surface_key[slash_index + 1..];
227        let sanitized_model = sanitize_model_name(model);
228        format!("{}/{}", surface, sanitized_model)
229    } else {
230        surface_key.to_string()
231    }
232}
233
234/// Sanitize a model name to its public equivalent.
235/// Maps internal variants to their public names based on model family.
236pub fn sanitize_model_name(short_name: &str) -> String {
237    // Map internal variants to public equivalents based on model family
238    if short_name.contains("opus-4-6") {
239        return "claude-opus-4-6".to_string();
240    }
241    if short_name.contains("opus-4-5") {
242        return "claude-opus-4-5".to_string();
243    }
244    if short_name.contains("opus-4-1") {
245        return "claude-opus-4-1".to_string();
246    }
247    if short_name.contains("opus-4") {
248        return "claude-opus-4".to_string();
249    }
250    if short_name.contains("sonnet-4-6") {
251        return "claude-sonnet-4-6".to_string();
252    }
253    if short_name.contains("sonnet-4-5") {
254        return "claude-sonnet-4-5".to_string();
255    }
256    if short_name.contains("sonnet-4") {
257        return "claude-sonnet-4".to_string();
258    }
259    if short_name.contains("sonnet-3-7") {
260        return "claude-sonnet-3-7".to_string();
261    }
262    if short_name.contains("haiku-4-5") {
263        return "claude-haiku-4-5".to_string();
264    }
265    if short_name.contains("haiku-3-5") {
266        return "claude-haiku-3-5".to_string();
267    }
268    // Unknown models get a generic name
269    "claude".to_string()
270}
271
272/// Get the current client surface from environment.
273pub fn get_client_surface() -> String {
274    std::env::var(ai::CODE_ENTRYPOINT).unwrap_or_else(|_| "cli".to_string())
275}
276
277/// Build a surface key that includes the model name.
278/// Format: "surface/model" (e.g., "cli/claude-sonnet")
279pub fn build_surface_key(surface: &str, model: &str) -> String {
280    format!("{}/{}", surface, model)
281}
282
283/// Compute SHA-256 hash of content.
284pub fn compute_content_hash(content: &str) -> String {
285    let mut hasher = Sha256::new();
286    hasher.update(content.as_bytes());
287    let result = hasher.finalize();
288    hex::encode(result)
289}
290
291/// Normalize file path to relative path from cwd for consistent tracking.
292/// Resolves symlinks to handle /tmp vs /private/tmp on macOS.
293pub fn normalize_file_path(file_path: &str) -> String {
294    let cwd = get_attribution_repo_root();
295    let cwd_path = Path::new(&cwd);
296    let file_path_buf = PathBuf::from(file_path);
297
298    if !file_path_buf.is_absolute() {
299        return file_path.to_string();
300    }
301
302    // Resolve symlinks in both paths for consistent comparison
303    // (e.g., /tmp -> /private/tmp on macOS)
304    let resolved_path = std::fs::read_link(&file_path_buf)
305        .map(|p| PathBuf::from(file_path).join(p))
306        .unwrap_or_else(|_| file_path_buf.clone());
307
308    let resolved_cwd = std::fs::read_link(cwd_path)
309        .map(|p| cwd_path.join(p))
310        .unwrap_or_else(|_| cwd_path.to_path_buf());
311
312    let sep = std::path::MAIN_SEPARATOR;
313    if resolved_path.starts_with(&resolved_cwd) || resolved_path == resolved_cwd {
314        // Normalize to forward slashes so keys match git diff output on Windows
315        return resolved_path
316            .strip_prefix(&resolved_cwd)
317            .map(|p| p.to_string_lossy().replace(sep, "/"))
318            .unwrap_or_else(|_| file_path.to_string());
319    }
320
321    // Fallback: try original comparison
322    if file_path.starts_with(&format!("{}{}", cwd, sep)) || file_path == cwd {
323        return PathBuf::from(file_path)
324            .strip_prefix(&cwd)
325            .map(|p| p.to_string_lossy().replace(sep, "/"))
326            .unwrap_or_else(|_| file_path.to_string());
327    }
328
329    file_path.to_string()
330}
331
332/// Expand a relative path to absolute path.
333pub fn expand_file_path(file_path: &str) -> String {
334    if Path::new(file_path).is_absolute() {
335        file_path.to_string()
336    } else {
337        let repo_root = get_attribution_repo_root();
338        Path::new(&repo_root)
339            .join(file_path)
340            .to_string_lossy()
341            .to_string()
342    }
343}
344
345/// Create an empty attribution state for a new session.
346pub fn create_empty_attribution_state() -> AttributionState {
347    AttributionState {
348        file_states: HashMap::new(),
349        session_baselines: HashMap::new(),
350        surface: get_client_surface(),
351        starting_head_sha: None,
352        prompt_count: 0,
353        prompt_count_at_last_commit: 0,
354        permission_prompt_count: 0,
355        permission_prompt_count_at_last_commit: 0,
356        escape_count: 0,
357        escape_count_at_last_commit: 0,
358    }
359}
360
361/// Track a file modification by Claude.
362/// Called after Edit/Write tool completes.
363pub fn track_file_modification(
364    state: AttributionState,
365    file_path: &str,
366    old_content: &str,
367    new_content: &str,
368    _user_modified: bool,
369    mtime: Option<u64>,
370) -> AttributionState {
371    let normalized_path = normalize_file_path(file_path);
372    let mtime = mtime.unwrap_or_else(current_timestamp);
373
374    let new_file_state = compute_file_modification_state(
375        &state.file_states,
376        file_path,
377        old_content,
378        new_content,
379        mtime,
380    );
381
382    if new_file_state.is_none() {
383        return state;
384    }
385
386    let mut new_file_states = state.file_states.clone();
387    new_file_states.insert(normalized_path, new_file_state.unwrap());
388
389    AttributionState {
390        file_states: new_file_states,
391        ..state
392    }
393}
394
395/// Track a file creation by Claude (e.g., via bash command).
396/// Used when Claude creates a new file through a non-tracked mechanism.
397pub fn track_file_creation(
398    state: AttributionState,
399    file_path: &str,
400    content: &str,
401    mtime: Option<u64>,
402) -> AttributionState {
403    // A creation is simply a modification from empty to the new content
404    track_file_modification(state, file_path, "", content, false, mtime)
405}
406
407/// Track a file deletion by Claude (e.g., via bash rm command).
408/// Used when Claude deletes a file through a non-tracked mechanism.
409pub fn track_file_deletion(
410    state: AttributionState,
411    file_path: &str,
412    old_content: &str,
413) -> AttributionState {
414    let normalized_path = normalize_file_path(file_path);
415    let existing_state = state.file_states.get(&normalized_path);
416    let existing_contribution = existing_state.map(|s| s.claude_contribution).unwrap_or(0);
417    let deleted_chars = old_content.len() as u64;
418
419    let new_file_state = FileAttributionState {
420        content_hash: String::new(), // Empty hash for deleted files
421        claude_contribution: existing_contribution + deleted_chars,
422        mtime: current_timestamp(),
423    };
424
425    let mut new_file_states = state.file_states.clone();
426    new_file_states.insert(normalized_path, new_file_state);
427
428    AttributionState {
429        file_states: new_file_states,
430        ..state
431    }
432}
433
434/// Track multiple file changes in bulk.
435pub fn track_bulk_file_changes(
436    state: AttributionState,
437    changes: Vec<FileChange>,
438) -> AttributionState {
439    // Create ONE copy of the HashMap, then mutate it for each file
440    let mut new_file_states = state.file_states.clone();
441
442    for change in changes {
443        let mtime = change.mtime.unwrap_or_else(current_timestamp);
444        if change.change_type == FileChangeType::Deleted {
445            let normalized_path = normalize_file_path(&change.path);
446            let existing_state = new_file_states.get(&normalized_path);
447            let existing_contribution = existing_state.map(|s| s.claude_contribution).unwrap_or(0);
448            let deleted_chars = change.old_content.len() as u64;
449
450            new_file_states.insert(
451                normalized_path,
452                FileAttributionState {
453                    content_hash: String::new(),
454                    claude_contribution: existing_contribution + deleted_chars,
455                    mtime,
456                },
457            );
458        } else {
459            let new_file_state = compute_file_modification_state(
460                &new_file_states,
461                &change.path,
462                &change.old_content,
463                &change.new_content,
464                mtime,
465            );
466            if let Some(file_state) = new_file_state {
467                let normalized_path = normalize_file_path(&change.path);
468                new_file_states.insert(normalized_path, file_state);
469            }
470        }
471    }
472
473    AttributionState {
474        file_states: new_file_states,
475        ..state
476    }
477}
478
479/// File change for bulk tracking.
480#[derive(Debug, Clone)]
481pub struct FileChange {
482    pub path: String,
483    pub change_type: FileChangeType,
484    pub old_content: String,
485    pub new_content: String,
486    pub mtime: Option<u64>,
487}
488
489#[derive(Debug, Clone, Copy, PartialEq, Eq)]
490pub enum FileChangeType {
491    Modified,
492    Created,
493    Deleted,
494}
495
496/// Convert attribution state to snapshot message for persistence.
497pub fn state_to_snapshot_message(
498    state: &AttributionState,
499    message_id: &str,
500) -> AttributionSnapshotMessage {
501    AttributionSnapshotMessage {
502        message_type: "attribution-snapshot".to_string(),
503        message_id: message_id.to_string(),
504        surface: state.surface.clone(),
505        file_states: state.file_states.clone(),
506        prompt_count: state.prompt_count,
507        prompt_count_at_last_commit: state.prompt_count_at_last_commit,
508        permission_prompt_count: state.permission_prompt_count,
509        permission_prompt_count_at_last_commit: state.permission_prompt_count_at_last_commit,
510        escape_count: state.escape_count,
511        escape_count_at_last_commit: state.escape_count_at_last_commit,
512    }
513}
514
515/// Restore attribution state from snapshot messages.
516pub fn restore_attribution_state_from_snapshots(
517    snapshots: &[AttributionSnapshotMessage],
518) -> AttributionState {
519    let mut state = create_empty_attribution_state();
520
521    // Snapshots are full-state dumps, not deltas.
522    // The last snapshot has the most recent count for every path.
523    let Some(last_snapshot) = snapshots.last() else {
524        return state;
525    };
526
527    state.surface = last_snapshot.surface.clone();
528    state.file_states = last_snapshot.file_states.clone();
529
530    // Restore prompt counts from the last snapshot (most recent state)
531    state.prompt_count = last_snapshot.prompt_count;
532    state.prompt_count_at_last_commit = last_snapshot.prompt_count_at_last_commit;
533    state.permission_prompt_count = last_snapshot.permission_prompt_count;
534    state.permission_prompt_count_at_last_commit =
535        last_snapshot.permission_prompt_count_at_last_commit;
536    state.escape_count = last_snapshot.escape_count;
537    state.escape_count_at_last_commit = last_snapshot.escape_count_at_last_commit;
538
539    state
540}
541
542/// Restore attribution state from log snapshots on session resume.
543pub fn attribution_restore_state_from_log<F>(
544    attribution_snapshots: Vec<AttributionSnapshotMessage>,
545    on_update_state: F,
546) where
547    F: Fn(AttributionState),
548{
549    let state = restore_attribution_state_from_snapshots(&attribution_snapshots);
550    on_update_state(state);
551}
552
553/// Increment promptCount and save an attribution snapshot.
554/// Used to persist the prompt count across compaction.
555pub fn increment_prompt_count(
556    attribution: AttributionState,
557    save_snapshot: impl Fn(AttributionSnapshotMessage),
558) -> AttributionState {
559    let new_attribution = AttributionState {
560        prompt_count: attribution.prompt_count + 1,
561        ..attribution
562    };
563    let snapshot = state_to_snapshot_message(&new_attribution, &uuid::Uuid::new_v4().to_string());
564    save_snapshot(snapshot);
565    new_attribution
566}
567
568// ============================================================================
569// Private Functions
570// ============================================================================
571
572/// Compute the character contribution for a file modification.
573/// Returns the FileAttributionState to store, or None if tracking failed.
574fn compute_file_modification_state(
575    existing_file_states: &HashMap<String, FileAttributionState>,
576    file_path: &str,
577    old_content: &str,
578    new_content: &str,
579    mtime: u64,
580) -> Option<FileAttributionState> {
581    let normalized_path = normalize_file_path(file_path);
582
583    // Calculate Claude's character contribution
584    let claude_contribution: u64;
585
586    if old_content.is_empty() || new_content.is_empty() {
587        // New file or full deletion - contribution is the content length
588        claude_contribution = if old_content.is_empty() {
589            new_content.len() as u64
590        } else {
591            old_content.len() as u64
592        };
593    } else {
594        // Find actual changed region via common prefix/suffix matching.
595        // This correctly handles same-length replacements (e.g., "Esc" -> "esc")
596        // where Math.abs(newLen - oldLen) would be 0.
597        let min_len = old_content.len().min(new_content.len());
598        let mut prefix_end = 0;
599        while prefix_end < min_len
600            && old_content.as_bytes()[prefix_end] == new_content.as_bytes()[prefix_end]
601        {
602            prefix_end += 1;
603        }
604
605        let mut suffix_len = 0;
606        while suffix_len < min_len - prefix_end
607            && old_content.as_bytes()[old_content.len() - 1 - suffix_len]
608                == new_content.as_bytes()[new_content.len() - 1 - suffix_len]
609        {
610            suffix_len += 1;
611        }
612
613        let old_changed_len = old_content.len() - prefix_end - suffix_len;
614        let new_changed_len = new_content.len() - prefix_end - suffix_len;
615        claude_contribution = old_changed_len.max(new_changed_len) as u64;
616    }
617
618    // Get current file state if it exists
619    let existing_contribution = existing_file_states
620        .get(&normalized_path)
621        .map(|s| s.claude_contribution)
622        .unwrap_or(0);
623
624    Some(FileAttributionState {
625        content_hash: compute_content_hash(new_content),
626        claude_contribution: existing_contribution + claude_contribution,
627        mtime,
628    })
629}
630
631/// Get current timestamp in milliseconds since epoch.
632fn current_timestamp() -> u64 {
633    SystemTime::now()
634        .duration_since(UNIX_EPOCH)
635        .map(|d| d.as_millis() as u64)
636        .unwrap_or(0)
637}
638
639// ============================================================================
640// Async Functions (placeholders - would need async runtime integration)
641// ============================================================================
642
643/// Check if the current repo is in the allowlist for internal model names.
644/// This is a placeholder - would need proper async integration.
645pub async fn is_internal_model_repo() -> bool {
646    // Check cache first
647    if let Some(class) = REPO_CLASS_CACHE.read().ok().and_then(|g| *g) {
648        return class == RepoClass::Internal;
649    }
650
651    let cwd = get_attribution_repo_root();
652
653    // TODO: Implement actual async check with get_remote_url_for_dir
654    // For now, return false (safe default: don't leak)
655    let _ = cwd;
656    false
657}
658
659/// Get a file's modification time (mtimeMs), falling back to Date.now() if
660/// the file doesn't exist.
661pub async fn get_file_mtime(file_path: &str) -> u64 {
662    let normalized_path = normalize_file_path(file_path);
663    let abs_path = expand_file_path(&normalized_path);
664
665    std::fs::metadata(&abs_path)
666        .and_then(|m| m.modified())
667        .ok()
668        .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
669        .map(|d| d.as_millis() as u64)
670        .unwrap_or_else(current_timestamp)
671}
672
673/// Calculate final attribution for staged files.
674/// Compares session baseline to committed state.
675pub async fn calculate_commit_attribution(
676    states: Vec<AttributionState>,
677    staged_files: Vec<String>,
678) -> AttributionData {
679    let cwd = get_attribution_repo_root();
680    // TODO: Get actual session ID
681    let session_id = uuid::Uuid::new_v4().to_string();
682
683    let mut files: HashMap<String, FileAttribution> = HashMap::new();
684    let mut excluded_generated: Vec<String> = Vec::new();
685    let mut surfaces: HashSet<String> = HashSet::new();
686    let mut surface_counts: HashMap<String, u64> = HashMap::new();
687
688    let mut total_claude_chars: u64 = 0;
689    let mut total_human_chars: u64 = 0;
690
691    // Merge file states from all sessions
692    let mut merged_file_states: HashMap<String, FileAttributionState> = HashMap::new();
693    let mut merged_baselines: HashMap<String, SessionBaseline> = HashMap::new();
694
695    for state in &states {
696        surfaces.insert(state.surface.clone());
697
698        // Merge baselines (earliest baseline wins)
699        for (path, baseline) in &state.session_baselines {
700            if !merged_baselines.contains_key(path) {
701                merged_baselines.insert(path.clone(), baseline.clone());
702            }
703        }
704
705        // Merge file states (accumulate contributions)
706        for (path, file_state) in &state.file_states {
707            if let Some(existing) = merged_file_states.get(path) {
708                merged_file_states.insert(
709                    path.clone(),
710                    FileAttributionState {
711                        content_hash: file_state.content_hash.clone(),
712                        claude_contribution: existing.claude_contribution
713                            + file_state.claude_contribution,
714                        mtime: file_state.mtime,
715                    },
716                );
717            } else {
718                merged_file_states.insert(path.clone(), file_state.clone());
719            }
720        }
721    }
722
723    // Process each staged file
724    for file in staged_files {
725        // Skip generated files (placeholder - would need is_generated_file)
726        // if is_generated_file(&file) {
727        //     excluded_generated.push(file.clone());
728        //     continue;
729        // }
730
731        let abs_path = PathBuf::from(&cwd).join(&file);
732        let file_state = merged_file_states.get(&file);
733        let baseline = merged_baselines.get(&file);
734
735        // Get the surface for this file
736        let file_surface = states
737            .first()
738            .map(|s| s.surface.clone())
739            .unwrap_or_else(get_client_surface);
740
741        let (mut claude_chars, mut human_chars) = (0u64, 0u64);
742
743        // Check if file was deleted (placeholder - would need is_file_deleted)
744        // For now, check if file exists
745        let deleted = !abs_path.exists();
746
747        if deleted {
748            // File was deleted
749            if let Some(state) = file_state {
750                claude_chars = state.claude_contribution;
751                human_chars = 0;
752            } else {
753                // Human deleted this file - use diff size estimation
754                human_chars = 100; // Minimum attribution for a deletion
755            }
756        } else {
757            // File exists - use file size as proxy for char count
758            if let Ok(stats) = std::fs::metadata(&abs_path) {
759                if file_state.is_some() {
760                    // We have tracked modifications for this file
761                    claude_chars = file_state.map(|s| s.claude_contribution).unwrap_or(0);
762                    human_chars = 0;
763                } else if baseline.is_some() {
764                    // File was modified but not tracked - human modification
765                    human_chars = stats.len() as u64;
766                } else {
767                    // New file not created by Claude
768                    human_chars = stats.len() as u64;
769                }
770            }
771        }
772
773        // Ensure non-negative values
774        claude_chars = claude_chars.max(0);
775        human_chars = human_chars.max(0);
776
777        let total = claude_chars + human_chars;
778        let percent = if total > 0 {
779            ((claude_chars as f64 / total as f64) * 100.0).round() as u32
780        } else {
781            0
782        };
783
784        files.insert(
785            file.clone(),
786            FileAttribution {
787                claude_chars,
788                human_chars,
789                percent,
790                surface: file_surface.clone(),
791            },
792        );
793
794        total_claude_chars += claude_chars;
795        total_human_chars += human_chars;
796
797        *surface_counts.entry(file_surface).or_insert(0) += claude_chars;
798    }
799
800    let total_chars = total_claude_chars + total_human_chars;
801    let claude_percent = if total_chars > 0 {
802        ((total_claude_chars as f64 / total_chars as f64) * 100.0).round() as u32
803    } else {
804        0
805    };
806
807    // Calculate surface breakdown (percentage of total content per surface)
808    let mut surface_breakdown: HashMap<String, SurfaceBreakdown> = HashMap::new();
809    for (surface, chars) in surface_counts {
810        let percent = if total_chars > 0 {
811            ((chars as f64 / total_chars as f64) * 100.0).round() as u32
812        } else {
813            0
814        };
815        surface_breakdown.insert(
816            surface,
817            SurfaceBreakdown {
818                claude_chars: chars,
819                percent,
820            },
821        );
822    }
823
824    AttributionData {
825        version: 1,
826        summary: AttributionSummary {
827            claude_percent,
828            claude_chars: total_claude_chars,
829            human_chars: total_human_chars,
830            surfaces: surfaces.into_iter().collect(),
831        },
832        files,
833        surface_breakdown,
834        excluded_generated,
835        sessions: vec![session_id],
836    }
837}
838
839/// Get staged files from git.
840pub async fn get_staged_files() -> Vec<String> {
841    // TODO: Implement with actual git command
842    // For now, return empty
843    Vec::new()
844}
845
846#[cfg(test)]
847mod tests {
848    use super::*;
849
850    #[test]
851    fn test_sanitize_model_name() {
852        assert_eq!(sanitize_model_name("opus-4-5-fast"), "claude-opus-4-5");
853        assert_eq!(sanitize_model_name("sonnet-4"), "claude-sonnet-4");
854        assert_eq!(sanitize_model_name("unknown"), "claude");
855    }
856
857    #[test]
858    fn test_sanitize_surface_key() {
859        assert_eq!(
860            sanitize_surface_key("cli/opus-4-5-fast"),
861            "cli/claude-opus-4-5"
862        );
863        assert_eq!(sanitize_surface_key("cli"), "cli");
864    }
865
866    #[test]
867    fn test_compute_content_hash() {
868        let hash1 = compute_content_hash("hello");
869        let hash2 = compute_content_hash("hello");
870        let hash3 = compute_content_hash("world");
871        assert_eq!(hash1, hash2);
872        assert_ne!(hash1, hash3);
873    }
874
875    #[test]
876    fn test_normalize_file_path() {
877        // Test relative path stays relative
878        assert_eq!(normalize_file_path("test.rs"), "test.rs");
879
880        // Test absolute path normalization (depends on current dir)
881        let abs_path = std::env::current_dir()
882            .unwrap()
883            .join("test.rs")
884            .to_string_lossy()
885            .to_string();
886        let normalized = normalize_file_path(&abs_path);
887        assert!(normalized.ends_with("test.rs") || normalized == abs_path);
888    }
889
890    #[test]
891    fn test_create_empty_attribution_state() {
892        let state = create_empty_attribution_state();
893        assert!(state.file_states.is_empty());
894        assert_eq!(state.prompt_count, 0);
895    }
896
897    #[test]
898    fn test_track_file_creation() {
899        let state = create_empty_attribution_state();
900        let state = track_file_creation(state, "test.rs", "fn main() {}", None);
901        assert!(state.file_states.contains_key("test.rs"));
902    }
903
904    #[test]
905    fn test_track_file_modification() {
906        let state = create_empty_attribution_state();
907        let state = track_file_modification(state, "test.rs", "", "fn main() {}", false, None);
908        assert!(state.file_states.contains_key("test.rs"));
909    }
910}