git_iris/
token_optimizer.rs1use crate::context::CommitContext;
2use crate::log_debug;
3use tiktoken_rs::cl100k_base;
4
5pub struct TokenOptimizer {
6 encoder: tiktoken_rs::CoreBPE,
7 max_tokens: usize,
8}
9
10impl TokenOptimizer {
11 #[allow(clippy::unwrap_used)] pub fn new(max_tokens: usize) -> Self {
13 Self {
14 encoder: cl100k_base().unwrap(),
15 max_tokens,
16 }
17 }
18
19 pub fn optimize_context(&self, context: &mut CommitContext) {
20 let mut remaining_tokens = self.max_tokens;
21 let mut total_tokens = 0;
22
23 for file in &mut context.staged_files {
25 let diff_tokens = self.count_tokens(&file.diff);
26 if total_tokens + diff_tokens > self.max_tokens {
27 log_debug!(
28 "Truncating diff for {} from {} tokens to {} tokens",
29 file.path,
30 diff_tokens,
31 remaining_tokens
32 );
33 file.diff = self.truncate_string(&file.diff, remaining_tokens);
34 total_tokens += remaining_tokens;
35 remaining_tokens = 0;
36 } else {
37 total_tokens += diff_tokens;
38 remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
39 }
40
41 if remaining_tokens == 0 {
42 log_debug!(
44 "Token budget exhausted after diffs (total: {}), clearing commits and contents",
45 total_tokens
46 );
47 Self::clear_commits_and_contents(context);
48 return;
49 }
50 }
51
52 for commit in &mut context.recent_commits {
54 let commit_tokens = self.count_tokens(&commit.message);
55 if total_tokens + commit_tokens > self.max_tokens {
56 log_debug!(
57 "Truncating commit message from {} tokens to {} tokens",
58 commit_tokens,
59 remaining_tokens
60 );
61 commit.message = self.truncate_string(&commit.message, remaining_tokens);
62 total_tokens += remaining_tokens;
63 remaining_tokens = 0;
64 } else {
65 total_tokens += commit_tokens;
66 remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
67 }
68
69 if remaining_tokens == 0 {
70 log_debug!(
72 "Token budget exhausted after commits (total: {}), clearing contents",
73 total_tokens
74 );
75 Self::clear_contents(context);
76 return;
77 }
78 }
79
80 for file in &mut context.staged_files {
82 if let Some(content) = &mut file.content {
83 let content_tokens = self.count_tokens(content);
84 if total_tokens + content_tokens > self.max_tokens {
85 log_debug!(
86 "Truncating file content for {} from {} tokens to {} tokens",
87 file.path,
88 content_tokens,
89 remaining_tokens
90 );
91 *content = self.truncate_string(content, remaining_tokens);
92 total_tokens += remaining_tokens;
93 remaining_tokens = 0;
94 } else {
95 total_tokens += content_tokens;
96 remaining_tokens = self.max_tokens.saturating_sub(total_tokens);
97 }
98
99 if remaining_tokens == 0 {
100 log_debug!(
101 "Token budget exhausted after file contents (total: {})",
102 total_tokens
103 );
104 return; }
106 }
107 }
108
109 log_debug!("Final token count after optimization: {}", total_tokens);
110 }
111
112 #[allow(clippy::unwrap_used)] pub fn truncate_string(&self, s: &str, max_tokens: usize) -> String {
115 let tokens = self.encoder.encode_ordinary(s);
116
117 if tokens.len() <= max_tokens {
118 return s.to_string();
119 }
120
121 let truncation_limit = max_tokens.saturating_sub(1); let mut truncated_tokens = tokens[..truncation_limit].to_vec();
123 truncated_tokens.push(self.encoder.encode_ordinary("…")[0]);
124
125 self.encoder.decode(truncated_tokens).unwrap()
126 }
127
128 fn clear_commits_and_contents(context: &mut CommitContext) {
130 Self::clear_commits(context);
131 Self::clear_contents(context);
132 }
133
134 fn clear_commits(context: &mut CommitContext) {
136 for commit in &mut context.recent_commits {
137 commit.message.clear();
138 }
139 }
140
141 fn clear_contents(context: &mut CommitContext) {
143 for file in &mut context.staged_files {
144 file.content = None;
145 }
146 }
147
148 pub fn count_tokens(&self, s: &str) -> usize {
150 let tokens = self.encoder.encode_ordinary(s);
151 tokens.len()
152 }
153}