1use std::path::{Path, PathBuf};
9use std::sync::OnceLock;
10use serde::{Deserialize, Serialize};
11use chrono::{DateTime, Utc};
12use regex::Regex;
13
14use crate::prompt::AssembledPrompt;
15
16static SECTION_PATTERN: OnceLock<Regex> = OnceLock::new();
18static TAG_PATTERN: OnceLock<Regex> = OnceLock::new();
19
20fn get_section_pattern() -> &'static Regex {
21 SECTION_PATTERN.get_or_init(|| Regex::new(r"\[([^\]]+)\]").unwrap())
22}
23
24fn get_tag_pattern() -> &'static Regex {
25 TAG_PATTERN.get_or_init(|| Regex::new(r"<([a-zA-Z_][a-zA-Z0-9_]*)>").unwrap())
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DumpEntry {
31 pub timestamp: DateTime<Utc>,
33 pub profile: String,
35 pub prompt: String,
37 pub cached_sections: usize,
39 pub dynamic_sections: usize,
41 pub cached_tokens: usize,
43 pub dynamic_tokens: usize,
45 pub total_tokens: usize,
47 pub cache_efficiency: f64,
49 pub session_id: Option<String>,
51 pub conversation_id: Option<String>,
53}
54
55impl DumpEntry {
56 pub fn from_prompt(prompt: &AssembledPrompt, session_id: Option<String>) -> Self {
58 Self {
59 timestamp: Utc::now(),
60 profile: prompt.profile.to_string(),
61 prompt: prompt.prompt.clone(),
62 cached_sections: prompt.cached_sections,
63 dynamic_sections: prompt.dynamic_sections,
64 cached_tokens: prompt.cached_tokens,
65 dynamic_tokens: prompt.dynamic_tokens,
66 total_tokens: prompt.total_tokens,
67 cache_efficiency: prompt.cache_efficiency(),
68 session_id,
69 conversation_id: None,
70 }
71 }
72
73 pub fn with_conversation(mut self, conversation_id: String) -> Self {
75 self.conversation_id = Some(conversation_id);
76 self
77 }
78}
79
80pub struct PromptDumper {
82 dump_path: Option<PathBuf>,
84 dump_enabled: bool,
86 print_enabled: bool,
88 session_id: Option<String>,
90 entries: Vec<DumpEntry>,
92 buffer_size: usize,
94}
95
96impl PromptDumper {
97 pub fn new() -> Self {
99 Self {
100 dump_path: None,
101 dump_enabled: false,
102 print_enabled: false,
103 session_id: None,
104 entries: Vec::new(),
105 buffer_size: 100,
106 }
107 }
108
109 pub fn enable_file_dump<P: Into<PathBuf>>(mut self, path: P) -> Self {
111 self.dump_path = Some(path.into());
112 self.dump_enabled = true;
113 self
114 }
115
116 pub fn enable_print(mut self) -> Self {
118 self.print_enabled = true;
119 self
120 }
121
122 pub fn with_session(mut self, session_id: String) -> Self {
124 self.session_id = Some(session_id);
125 self
126 }
127
128 pub fn with_buffer_size(mut self, size: usize) -> Self {
130 self.buffer_size = size;
131 self
132 }
133
134 pub fn dump(&mut self, prompt: &AssembledPrompt) {
136 let entry = DumpEntry::from_prompt(prompt, self.session_id.clone());
137
138 if self.print_enabled {
140 self.print_entry(&entry);
141 }
142
143 self.entries.push(entry);
145
146 if self.entries.len() >= self.buffer_size {
148 self.flush();
149 }
150 }
151
152 pub fn dump_with_conversation(&mut self, prompt: &AssembledPrompt, conversation_id: String) {
154 let entry = DumpEntry::from_prompt(prompt, self.session_id.clone())
155 .with_conversation(conversation_id);
156
157 if self.print_enabled {
158 self.print_entry(&entry);
159 }
160
161 self.entries.push(entry);
162
163 if self.entries.len() >= self.buffer_size {
164 self.flush();
165 }
166 }
167
168 fn print_entry(&self, entry: &DumpEntry) {
170 println!("=== Prompt Dump ===");
171 println!("Timestamp: {}", entry.timestamp);
172 println!("Profile: {}", entry.profile);
173 println!("Sections: {} cached, {} dynamic", entry.cached_sections, entry.dynamic_sections);
174 println!("Tokens: {} cached, {} dynamic, {} total",
175 entry.cached_tokens, entry.dynamic_tokens, entry.total_tokens);
176 println!("Cache efficiency: {:.1}%", entry.cache_efficiency);
177 println!("--- Prompt Content ---");
178
179 if entry.prompt.len() > 2000 {
181 println!("{}... (truncated, {} chars total)",
182 &entry.prompt[..2000], entry.prompt.len());
183 } else {
184 println!("{}", entry.prompt);
185 }
186
187 println!("=== End Dump ===");
188 }
189
190 pub fn flush(&mut self) {
192 if !self.dump_enabled || self.dump_path.is_none() || self.entries.is_empty() {
193 return;
194 }
195
196 let path = self.dump_path.as_ref().unwrap();
197
198 if let Some(parent) = path.parent() {
200 if !parent.exists() {
201 if let Err(e) = std::fs::create_dir_all(parent) {
202 log::warn!("Failed to create dump directory: {}", e);
203 return;
204 }
205 }
206 }
207
208 match std::fs::OpenOptions::new()
210 .create(true)
211 .append(true)
212 .open(path)
213 {
214 Ok(mut file) => {
215 use std::io::Write;
216 for entry in &self.entries {
217 match serde_json::to_string(entry) {
218 Ok(json) => {
219 if let Err(e) = writeln!(file, "{}", json) {
220 log::warn!("Failed to write dump entry: {}", e);
221 }
222 }
223 Err(e) => log::warn!("Failed to serialize dump entry: {}", e),
224 }
225 }
226 }
227 Err(e) => log::warn!("Failed to open dump file {}: {}", path.display(), e),
228 }
229
230 self.entries.clear();
231 }
232
233 pub fn entries(&self) -> &[DumpEntry] {
235 &self.entries
236 }
237
238 pub fn clear(&mut self) {
240 self.entries.clear();
241 }
242
243 pub fn analyze_prompt(prompt: &str) -> PromptAnalysis {
245 let mut analysis = PromptAnalysis::default();
246
247 let section_pattern = get_section_pattern();
249 for cap in section_pattern.captures_iter(prompt) {
250 analysis.sections.push(cap[1].to_string());
251 }
252
253 let tag_pattern = get_tag_pattern();
255 for cap in tag_pattern.captures_iter(prompt) {
256 let tag = cap[1].to_string();
257 analysis.xml_tags.push(tag.clone());
258 analysis.xml_tag_counts.entry(tag).and_modify(|c| *c += 1).or_insert(1);
259 }
260
261 analysis.has_cache_boundary = prompt.contains(crate::prompt::CACHE_BOUNDARY);
263
264 analysis.estimated_tokens = crate::prompt::cache::estimate_tokens(prompt);
266
267 analysis.char_count = prompt.len();
269
270 analysis.line_count = prompt.lines().count();
272
273 analysis
274 }
275}
276
277impl Default for PromptDumper {
278 fn default() -> Self {
279 Self::new()
280 }
281}
282
283#[derive(Debug, Clone, Default, Serialize, Deserialize)]
285pub struct PromptAnalysis {
286 pub sections: Vec<String>,
288 pub xml_tags: Vec<String>,
290 pub xml_tag_counts: std::collections::HashMap<String, usize>,
292 pub has_cache_boundary: bool,
294 pub estimated_tokens: usize,
296 pub char_count: usize,
298 pub line_count: usize,
300}
301
302impl PromptAnalysis {
303 pub fn print_summary(&self) {
305 println!("Prompt Analysis Summary:");
306 println!(" Sections: {:?}", self.sections);
307 println!(" XML tags: {} unique, {:?} counts", self.xml_tags.len(), self.xml_tag_counts);
308 println!(" Cache boundary: {}", self.has_cache_boundary);
309 println!(" Tokens estimate: {}", self.estimated_tokens);
310 println!(" Characters: {}", self.char_count);
311 println!(" Lines: {}", self.line_count);
312 }
313}
314
315pub fn read_dump_file<P: AsRef<Path>>(path: P) -> Vec<DumpEntry> {
317 let path = path.as_ref();
318 if !path.exists() {
319 return Vec::new();
320 }
321
322 let content = std::fs::read_to_string(path).unwrap_or_default();
323 content.lines()
324 .filter_map(|line| serde_json::from_str::<DumpEntry>(line).ok())
325 .collect()
326}
327
328pub fn analyze_dump_file<P: AsRef<Path>>(path: P) -> DumpFileAnalysis {
330 let entries = read_dump_file(path);
331
332 let mut analysis = DumpFileAnalysis::default();
333 analysis.total_entries = entries.len();
334
335 for entry in &entries {
336 analysis.total_tokens += entry.total_tokens;
337 analysis.avg_tokens += entry.total_tokens;
338 analysis.profile_counts.entry(entry.profile.clone()).and_modify(|c| *c += 1).or_insert(1);
339
340 if entry.cache_efficiency > analysis.max_cache_efficiency {
341 analysis.max_cache_efficiency = entry.cache_efficiency;
342 }
343 if entry.cache_efficiency < analysis.min_cache_efficiency || analysis.min_cache_efficiency == 0.0 {
344 analysis.min_cache_efficiency = entry.cache_efficiency;
345 }
346 }
347
348 if analysis.total_entries > 0 {
349 analysis.avg_tokens /= analysis.total_entries;
350 analysis.avg_cache_efficiency = entries.iter().map(|e| e.cache_efficiency).sum::<f64>() / analysis.total_entries as f64;
351 }
352
353 analysis
354}
355
356#[derive(Debug, Clone, Default, Serialize, Deserialize)]
358pub struct DumpFileAnalysis {
359 pub total_entries: usize,
361 pub total_tokens: usize,
363 pub avg_tokens: usize,
365 pub profile_counts: std::collections::HashMap<String, usize>,
367 pub max_cache_efficiency: f64,
369 pub min_cache_efficiency: f64,
371 pub avg_cache_efficiency: f64,
373}
374
375impl DumpFileAnalysis {
376 pub fn print_summary(&self) {
377 println!("Dump File Analysis:");
378 println!(" Total entries: {}", self.total_entries);
379 println!(" Total tokens: {}", self.total_tokens);
380 println!(" Average tokens: {}", self.avg_tokens);
381 println!(" Profile distribution: {:?}", self.profile_counts);
382 println!(" Cache efficiency: min {:.1}%, max {:.1}%, avg {:.1}%",
383 self.min_cache_efficiency, self.max_cache_efficiency, self.avg_cache_efficiency);
384 }
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390
391 #[test]
392 fn test_dump_entry_creation() {
393 let mut orchestrator = crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
394 orchestrator.add_section(crate::prompt::PromptSection::static_section("test", "test content"));
395
396 let assembled = orchestrator.assemble();
397 let entry = DumpEntry::from_prompt(&assembled, Some("session-1".to_string()));
398
399 assert_eq!(entry.profile, "default");
400 assert!(entry.prompt.contains("test"));
401 assert_eq!(entry.session_id, Some("session-1".to_string()));
402 }
403
404 #[test]
405 fn test_dumper_basic() {
406 let mut dumper = PromptDumper::new().enable_print();
407
408 let mut orchestrator = crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
409 orchestrator.add_section(crate::prompt::PromptSection::static_section("identity", "You are AI"));
410
411 let assembled = orchestrator.assemble();
412 dumper.dump(&assembled);
413
414 assert_eq!(dumper.entries().len(), 1);
415 }
416
417 #[test]
418 fn test_analyze_prompt() {
419 let prompt = "[identity]\nYou are AI\n\n<context>\nSome context\n</context>";
420 let analysis = PromptDumper::analyze_prompt(prompt);
421
422 assert!(analysis.sections.contains(&"identity".to_string()));
423 assert!(analysis.xml_tags.contains(&"context".to_string()));
424 assert!(!analysis.has_cache_boundary);
425 assert!(analysis.estimated_tokens > 0);
426 }
427
428 #[test]
429 fn test_prompt_analysis_summary() {
430 let prompt = "[test]\nContent";
431 let analysis = PromptDumper::analyze_prompt(prompt);
432 analysis.print_summary();
433 }
434
435 #[test]
436 fn test_dump_file_analysis() {
437 let temp_file = tempfile::NamedTempFile::new().unwrap();
438 let path = temp_file.path();
439
440 let mut dumper = PromptDumper::new()
442 .enable_file_dump(path)
443 .with_session("test-session".to_string());
444
445 let mut orchestrator = crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
446 orchestrator.add_section(crate::prompt::PromptSection::static_section("test", "content"));
447
448 for _ in 0..5 {
449 let assembled = orchestrator.assemble();
450 dumper.dump(&assembled);
451 }
452 dumper.flush();
453
454 let analysis = analyze_dump_file(path);
456 assert_eq!(analysis.total_entries, 5);
457 assert!(analysis.avg_tokens > 0);
458 analysis.print_summary();
459 }
460}