git_iris/
token_optimizer.rs

1use crate::context::CommitContext;
2use crate::log_debug;
3use tiktoken_rs::cl100k_base;
4
5pub struct TokenOptimizer {
6    encoder: tiktoken_rs::CoreBPE,
7    max_tokens: usize,
8}
9
10impl TokenOptimizer {
11    #[allow(clippy::unwrap_used)] // todo: handle unwrap
12    pub fn new(max_tokens: usize) -> Self {
13        Self {
14            encoder: cl100k_base().unwrap(),
15            max_tokens,
16        }
17    }
18
19    pub fn optimize_context(&self, context: &mut CommitContext) {
20        let mut remaining_tokens = self.max_tokens;
21        let mut total_tokens = 0;
22
23        // Step 1: Allocate tokens for the diffs (highest priority)
24        for file in &mut context.staged_files {
25            let diff_tokens = self.count_tokens(&file.diff);
26            if total_tokens + diff_tokens > self.max_tokens {
27                log_debug!(
28                    "Truncating diff for {} from {} tokens to {} tokens",
29                    file.path,
30                    diff_tokens,
31                    remaining_tokens
32                );
33                file.diff = self.truncate_string(&file.diff, remaining_tokens);
34                total_tokens += remaining_tokens;
35                remaining_tokens = 0;
36            } else {
37                total_tokens += diff_tokens;
38                remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
39            }
40
41            if remaining_tokens == 0 {
42                // If we exhaust the tokens in step 1, clear commits and contents
43                log_debug!(
44                    "Token budget exhausted after diffs (total: {}), clearing commits and contents",
45                    total_tokens
46                );
47                Self::clear_commits_and_contents(context);
48                return;
49            }
50        }
51
52        // Step 2: Allocate remaining tokens for recent commits (medium priority)
53        for commit in &mut context.recent_commits {
54            let commit_tokens = self.count_tokens(&commit.message);
55            if total_tokens + commit_tokens > self.max_tokens {
56                log_debug!(
57                    "Truncating commit message from {} tokens to {} tokens",
58                    commit_tokens,
59                    remaining_tokens
60                );
61                commit.message = self.truncate_string(&commit.message, remaining_tokens);
62                total_tokens += remaining_tokens;
63                remaining_tokens = 0;
64            } else {
65                total_tokens += commit_tokens;
66                remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
67            }
68
69            if remaining_tokens == 0 {
70                // If we exhaust the tokens in step 2, clear contents
71                log_debug!(
72                    "Token budget exhausted after commits (total: {}), clearing contents",
73                    total_tokens
74                );
75                Self::clear_contents(context);
76                return;
77            }
78        }
79
80        // Step 3: Allocate any leftover tokens for full file contents (lowest priority)
81        for file in &mut context.staged_files {
82            if let Some(content) = &mut file.content {
83                let content_tokens = self.count_tokens(content);
84                if total_tokens + content_tokens > self.max_tokens {
85                    log_debug!(
86                        "Truncating file content for {} from {} tokens to {} tokens",
87                        file.path,
88                        content_tokens,
89                        remaining_tokens
90                    );
91                    *content = self.truncate_string(content, remaining_tokens);
92                    total_tokens += remaining_tokens;
93                    remaining_tokens = 0;
94                } else {
95                    total_tokens += content_tokens;
96                    remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
97                }
98
99                if remaining_tokens == 0 {
100                    log_debug!(
101                        "Token budget exhausted after file contents (total: {})",
102                        total_tokens
103                    );
104                    return; // Exit early if we've exhausted the token budget
105                }
106            }
107        }
108
109        log_debug!("Final token count after optimization: {}", total_tokens);
110    }
111
112    // Truncate a string to fit within the specified token limit
113    #[allow(clippy::unwrap_used)] // todo: handle unwrap
114    pub fn truncate_string(&self, s: &str, max_tokens: usize) -> String {
115        let tokens = self.encoder.encode_ordinary(s);
116
117        if tokens.len() <= max_tokens {
118            return s.to_string();
119        }
120
121        let truncation_limit = max_tokens.saturating_sub(1); // Reserve space for the ellipsis
122        let mut truncated_tokens = tokens[..truncation_limit].to_vec();
123        truncated_tokens.push(self.encoder.encode_ordinary("…")[0]);
124
125        self.encoder.decode(truncated_tokens).unwrap()
126    }
127
128    // Clear all recent commits and full file contents
129    fn clear_commits_and_contents(context: &mut CommitContext) {
130        Self::clear_commits(context);
131        Self::clear_contents(context);
132    }
133
134    // Clear all recent commits
135    fn clear_commits(context: &mut CommitContext) {
136        for commit in &mut context.recent_commits {
137            commit.message.clear();
138        }
139    }
140
141    // Clear all full file contents
142    fn clear_contents(context: &mut CommitContext) {
143        for file in &mut context.staged_files {
144            file.content = None;
145        }
146    }
147
148    // Count the number of tokens in a string
149    pub fn count_tokens(&self, s: &str) -> usize {
150        let tokens = self.encoder.encode_ordinary(s);
151        tokens.len()
152    }
153}