1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use crate::context::CommitContext;
use tiktoken_rs::cl100k_base;

pub struct TokenOptimizer {
    encoder: tiktoken_rs::CoreBPE,
    max_tokens: usize,
}

impl TokenOptimizer {
    pub fn new(max_tokens: usize) -> Self {
        TokenOptimizer {
            encoder: cl100k_base().unwrap(),
            max_tokens,
        }
    }

    pub fn optimize_context(&self, context: &mut CommitContext) {
        let mut remaining_tokens = self.max_tokens;

        // Step 1: Allocate tokens for the diffs (highest priority)
        for file in &mut context.staged_files {
            let diff_tokens = self.count_tokens(&file.diff);
            if diff_tokens > remaining_tokens {
                file.diff = self.truncate_string(&file.diff, remaining_tokens);
                remaining_tokens = 0;
            } else {
                remaining_tokens = remaining_tokens.saturating_sub(diff_tokens);
            }

            if remaining_tokens == 0 {
                // If we exhaust the tokens in step 1, clear commits and contents
                self.clear_commits_and_contents(context);
                return;
            }
        }

        // Step 2: Allocate remaining tokens for recent commits (medium priority)
        for commit in &mut context.recent_commits {
            let commit_tokens = self.count_tokens(&commit.message);
            if commit_tokens > remaining_tokens {
                commit.message = self.truncate_string(&commit.message, remaining_tokens);
                remaining_tokens = 0;
            } else {
                remaining_tokens = remaining_tokens.saturating_sub(commit_tokens);
            }

            if remaining_tokens == 0 {
                // If we exhaust the tokens in step 2, clear contents
                self.clear_contents(context);
                return;
            }
        }

        // Step 3: Allocate any leftover tokens for full file contents (lowest priority)
        for file in &mut context.staged_files {
            if let Some(content) = &mut file.content {
                let content_tokens = self.count_tokens(content);
                if content_tokens > remaining_tokens {
                    *content = self.truncate_string(content, remaining_tokens);
                    remaining_tokens = 0;
                } else {
                    remaining_tokens = remaining_tokens.saturating_sub(content_tokens);
                }

                if remaining_tokens == 0 {
                    return; // Exit early if we've exhausted the token budget
                }
            }
        }
    }

    // Truncate a string to fit within the specified token limit
    pub fn truncate_string(&self, s: &str, max_tokens: usize) -> String {
        let tokens = self.encoder.encode_ordinary(s);

        if tokens.len() <= max_tokens {
            return s.to_string();
        }

        let truncation_limit = max_tokens.saturating_sub(1); // Reserve space for the ellipsis
        let mut truncated_tokens = tokens[..truncation_limit].to_vec();
        truncated_tokens.push(self.encoder.encode_ordinary("…")[0]);
        let truncated_string = self.encoder.decode(truncated_tokens).unwrap();

        truncated_string
    }

    // Clear all recent commits and full file contents
    fn clear_commits_and_contents(&self, context: &mut CommitContext) {
        self.clear_commits(context);
        self.clear_contents(context);
    }

    // Clear all recent commits
    fn clear_commits(&self, context: &mut CommitContext) {
        for commit in &mut context.recent_commits {
            commit.message.clear();
        }
    }

    // Clear all full file contents
    fn clear_contents(&self, context: &mut CommitContext) {
        for file in &mut context.staged_files {
            file.content = None;
        }
    }

    // Count the number of tokens in a string
    pub fn count_tokens(&self, s: &str) -> usize {
        let tokens = self.encoder.encode_ordinary(s);
        tokens.len()
    }
}