1use chrono::{DateTime, Utc};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use std::path::{Path, PathBuf};
12use std::sync::OnceLock;
13
14use crate::prompt::AssembledPrompt;
15
16static SECTION_PATTERN: OnceLock<Regex> = OnceLock::new();
18static TAG_PATTERN: OnceLock<Regex> = OnceLock::new();
19
20fn get_section_pattern() -> &'static Regex {
21 SECTION_PATTERN.get_or_init(|| Regex::new(r"\[([^\]]+)\]").unwrap())
22}
23
24fn get_tag_pattern() -> &'static Regex {
25 TAG_PATTERN.get_or_init(|| Regex::new(r"<([a-zA-Z_][a-zA-Z0-9_]*)>").unwrap())
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DumpEntry {
31 pub timestamp: DateTime<Utc>,
33 pub profile: String,
35 pub prompt: String,
37 pub cached_sections: usize,
39 pub dynamic_sections: usize,
41 pub cached_tokens: usize,
43 pub dynamic_tokens: usize,
45 pub total_tokens: usize,
47 pub cache_efficiency: f64,
49 pub session_id: Option<String>,
51 pub conversation_id: Option<String>,
53}
54
55impl DumpEntry {
56 pub fn from_prompt(prompt: &AssembledPrompt, session_id: Option<String>) -> Self {
58 Self {
59 timestamp: Utc::now(),
60 profile: prompt.profile.to_string(),
61 prompt: prompt.prompt.clone(),
62 cached_sections: prompt.cached_sections,
63 dynamic_sections: prompt.dynamic_sections,
64 cached_tokens: prompt.cached_tokens,
65 dynamic_tokens: prompt.dynamic_tokens,
66 total_tokens: prompt.total_tokens,
67 cache_efficiency: prompt.cache_efficiency(),
68 session_id,
69 conversation_id: None,
70 }
71 }
72
73 pub fn with_conversation(mut self, conversation_id: String) -> Self {
75 self.conversation_id = Some(conversation_id);
76 self
77 }
78}
79
80pub struct PromptDumper {
82 dump_path: Option<PathBuf>,
84 dump_enabled: bool,
86 print_enabled: bool,
88 session_id: Option<String>,
90 entries: Vec<DumpEntry>,
92 buffer_size: usize,
94}
95
96impl PromptDumper {
97 pub fn new() -> Self {
99 Self {
100 dump_path: None,
101 dump_enabled: false,
102 print_enabled: false,
103 session_id: None,
104 entries: Vec::new(),
105 buffer_size: 100,
106 }
107 }
108
109 pub fn enable_file_dump<P: Into<PathBuf>>(mut self, path: P) -> Self {
111 self.dump_path = Some(path.into());
112 self.dump_enabled = true;
113 self
114 }
115
116 pub fn enable_print(mut self) -> Self {
118 self.print_enabled = true;
119 self
120 }
121
122 pub fn with_session(mut self, session_id: String) -> Self {
124 self.session_id = Some(session_id);
125 self
126 }
127
128 pub fn with_buffer_size(mut self, size: usize) -> Self {
130 self.buffer_size = size;
131 self
132 }
133
134 pub fn dump(&mut self, prompt: &AssembledPrompt) {
136 let entry = DumpEntry::from_prompt(prompt, self.session_id.clone());
137
138 if self.print_enabled {
140 self.print_entry(&entry);
141 }
142
143 self.entries.push(entry);
145
146 if self.entries.len() >= self.buffer_size {
148 self.flush();
149 }
150 }
151
152 pub fn dump_with_conversation(&mut self, prompt: &AssembledPrompt, conversation_id: String) {
154 let entry = DumpEntry::from_prompt(prompt, self.session_id.clone())
155 .with_conversation(conversation_id);
156
157 if self.print_enabled {
158 self.print_entry(&entry);
159 }
160
161 self.entries.push(entry);
162
163 if self.entries.len() >= self.buffer_size {
164 self.flush();
165 }
166 }
167
168 fn print_entry(&self, entry: &DumpEntry) {
170 println!("=== Prompt Dump ===");
171 println!("Timestamp: {}", entry.timestamp);
172 println!("Profile: {}", entry.profile);
173 println!(
174 "Sections: {} cached, {} dynamic",
175 entry.cached_sections, entry.dynamic_sections
176 );
177 println!(
178 "Tokens: {} cached, {} dynamic, {} total",
179 entry.cached_tokens, entry.dynamic_tokens, entry.total_tokens
180 );
181 println!("Cache efficiency: {:.1}%", entry.cache_efficiency);
182 println!("--- Prompt Content ---");
183
184 if entry.prompt.len() > 2000 {
186 println!(
187 "{}... (truncated, {} chars total)",
188 &entry.prompt[..2000],
189 entry.prompt.len()
190 );
191 } else {
192 println!("{}", entry.prompt);
193 }
194
195 println!("=== End Dump ===");
196 }
197
198 pub fn flush(&mut self) {
200 if !self.dump_enabled || self.dump_path.is_none() || self.entries.is_empty() {
201 return;
202 }
203
204 let path = self.dump_path.as_ref().unwrap();
205
206 if let Some(parent) = path.parent() {
208 if !parent.exists() {
209 if let Err(e) = std::fs::create_dir_all(parent) {
210 log::warn!("Failed to create dump directory: {}", e);
211 return;
212 }
213 }
214 }
215
216 match std::fs::OpenOptions::new()
218 .create(true)
219 .append(true)
220 .open(path)
221 {
222 Ok(mut file) => {
223 use std::io::Write;
224 for entry in &self.entries {
225 match serde_json::to_string(entry) {
226 Ok(json) => {
227 if let Err(e) = writeln!(file, "{}", json) {
228 log::warn!("Failed to write dump entry: {}", e);
229 }
230 }
231 Err(e) => log::warn!("Failed to serialize dump entry: {}", e),
232 }
233 }
234 }
235 Err(e) => log::warn!("Failed to open dump file {}: {}", path.display(), e),
236 }
237
238 self.entries.clear();
239 }
240
241 pub fn entries(&self) -> &[DumpEntry] {
243 &self.entries
244 }
245
246 pub fn clear(&mut self) {
248 self.entries.clear();
249 }
250
251 pub fn analyze_prompt(prompt: &str) -> PromptAnalysis {
253 let mut analysis = PromptAnalysis::default();
254
255 let section_pattern = get_section_pattern();
257 for cap in section_pattern.captures_iter(prompt) {
258 analysis.sections.push(cap[1].to_string());
259 }
260
261 let tag_pattern = get_tag_pattern();
263 for cap in tag_pattern.captures_iter(prompt) {
264 let tag = cap[1].to_string();
265 analysis.xml_tags.push(tag.clone());
266 analysis
267 .xml_tag_counts
268 .entry(tag)
269 .and_modify(|c| *c += 1)
270 .or_insert(1);
271 }
272
273 analysis.has_cache_boundary = prompt.contains(crate::prompt::CACHE_BOUNDARY);
275
276 analysis.estimated_tokens = crate::prompt::cache::estimate_tokens(prompt);
278
279 analysis.char_count = prompt.len();
281
282 analysis.line_count = prompt.lines().count();
284
285 analysis
286 }
287}
288
289impl Default for PromptDumper {
290 fn default() -> Self {
291 Self::new()
292 }
293}
294
295#[derive(Debug, Clone, Default, Serialize, Deserialize)]
297pub struct PromptAnalysis {
298 pub sections: Vec<String>,
300 pub xml_tags: Vec<String>,
302 pub xml_tag_counts: std::collections::HashMap<String, usize>,
304 pub has_cache_boundary: bool,
306 pub estimated_tokens: usize,
308 pub char_count: usize,
310 pub line_count: usize,
312}
313
314impl PromptAnalysis {
315 pub fn print_summary(&self) {
317 println!("Prompt Analysis Summary:");
318 println!(" Sections: {:?}", self.sections);
319 println!(
320 " XML tags: {} unique, {:?} counts",
321 self.xml_tags.len(),
322 self.xml_tag_counts
323 );
324 println!(" Cache boundary: {}", self.has_cache_boundary);
325 println!(" Tokens estimate: {}", self.estimated_tokens);
326 println!(" Characters: {}", self.char_count);
327 println!(" Lines: {}", self.line_count);
328 }
329}
330
331pub fn read_dump_file<P: AsRef<Path>>(path: P) -> Vec<DumpEntry> {
333 let path = path.as_ref();
334 if !path.exists() {
335 return Vec::new();
336 }
337
338 let content = std::fs::read_to_string(path).unwrap_or_default();
339 content
340 .lines()
341 .filter_map(|line| serde_json::from_str::<DumpEntry>(line).ok())
342 .collect()
343}
344
345pub fn analyze_dump_file<P: AsRef<Path>>(path: P) -> DumpFileAnalysis {
347 let entries = read_dump_file(path);
348
349 let mut analysis = DumpFileAnalysis::default();
350 analysis.total_entries = entries.len();
351
352 for entry in &entries {
353 analysis.total_tokens += entry.total_tokens;
354 analysis.avg_tokens += entry.total_tokens;
355 analysis
356 .profile_counts
357 .entry(entry.profile.clone())
358 .and_modify(|c| *c += 1)
359 .or_insert(1);
360
361 if entry.cache_efficiency > analysis.max_cache_efficiency {
362 analysis.max_cache_efficiency = entry.cache_efficiency;
363 }
364 if entry.cache_efficiency < analysis.min_cache_efficiency
365 || analysis.min_cache_efficiency == 0.0
366 {
367 analysis.min_cache_efficiency = entry.cache_efficiency;
368 }
369 }
370
371 if analysis.total_entries > 0 {
372 analysis.avg_tokens /= analysis.total_entries;
373 analysis.avg_cache_efficiency =
374 entries.iter().map(|e| e.cache_efficiency).sum::<f64>() / analysis.total_entries as f64;
375 }
376
377 analysis
378}
379
380#[derive(Debug, Clone, Default, Serialize, Deserialize)]
382pub struct DumpFileAnalysis {
383 pub total_entries: usize,
385 pub total_tokens: usize,
387 pub avg_tokens: usize,
389 pub profile_counts: std::collections::HashMap<String, usize>,
391 pub max_cache_efficiency: f64,
393 pub min_cache_efficiency: f64,
395 pub avg_cache_efficiency: f64,
397}
398
399impl DumpFileAnalysis {
400 pub fn print_summary(&self) {
401 println!("Dump File Analysis:");
402 println!(" Total entries: {}", self.total_entries);
403 println!(" Total tokens: {}", self.total_tokens);
404 println!(" Average tokens: {}", self.avg_tokens);
405 println!(" Profile distribution: {:?}", self.profile_counts);
406 println!(
407 " Cache efficiency: min {:.1}%, max {:.1}%, avg {:.1}%",
408 self.min_cache_efficiency, self.max_cache_efficiency, self.avg_cache_efficiency
409 );
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416
417 #[test]
418 fn test_dump_entry_creation() {
419 let mut orchestrator =
420 crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
421 orchestrator.add_section(crate::prompt::PromptSection::static_section(
422 "test",
423 "test content",
424 ));
425
426 let assembled = orchestrator.assemble();
427 let entry = DumpEntry::from_prompt(&assembled, Some("session-1".to_string()));
428
429 assert_eq!(entry.profile, "default");
430 assert!(entry.prompt.contains("test"));
431 assert_eq!(entry.session_id, Some("session-1".to_string()));
432 }
433
434 #[test]
435 fn test_dumper_basic() {
436 let mut dumper = PromptDumper::new().enable_print();
437
438 let mut orchestrator =
439 crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
440 orchestrator.add_section(crate::prompt::PromptSection::static_section(
441 "identity",
442 "You are AI",
443 ));
444
445 let assembled = orchestrator.assemble();
446 dumper.dump(&assembled);
447
448 assert_eq!(dumper.entries().len(), 1);
449 }
450
451 #[test]
452 fn test_analyze_prompt() {
453 let prompt = "[identity]\nYou are AI\n\n<context>\nSome context\n</context>";
454 let analysis = PromptDumper::analyze_prompt(prompt);
455
456 assert!(analysis.sections.contains(&"identity".to_string()));
457 assert!(analysis.xml_tags.contains(&"context".to_string()));
458 assert!(!analysis.has_cache_boundary);
459 assert!(analysis.estimated_tokens > 0);
460 }
461
462 #[test]
463 fn test_prompt_analysis_summary() {
464 let prompt = "[test]\nContent";
465 let analysis = PromptDumper::analyze_prompt(prompt);
466 analysis.print_summary();
467 }
468
469 #[test]
470 fn test_dump_file_analysis() {
471 let temp_file = tempfile::NamedTempFile::new().unwrap();
472 let path = temp_file.path();
473
474 let mut dumper = PromptDumper::new()
476 .enable_file_dump(path)
477 .with_session("test-session".to_string());
478
479 let mut orchestrator =
480 crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
481 orchestrator.add_section(crate::prompt::PromptSection::static_section(
482 "test", "content",
483 ));
484
485 for _ in 0..5 {
486 let assembled = orchestrator.assemble();
487 dumper.dump(&assembled);
488 }
489 dumper.flush();
490
491 let analysis = analyze_dump_file(path);
493 assert_eq!(analysis.total_entries, 5);
494 assert!(analysis.avg_tokens > 0);
495 analysis.print_summary();
496 }
497}