use chrono::{DateTime, Utc};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use crate::prompt::AssembledPrompt;
static SECTION_PATTERN: OnceLock<Regex> = OnceLock::new();
static TAG_PATTERN: OnceLock<Regex> = OnceLock::new();
fn get_section_pattern() -> &'static Regex {
SECTION_PATTERN.get_or_init(|| Regex::new(r"\[([^\]]+)\]").unwrap())
}
fn get_tag_pattern() -> &'static Regex {
TAG_PATTERN.get_or_init(|| Regex::new(r"<([a-zA-Z_][a-zA-Z0-9_]*)>").unwrap())
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DumpEntry {
pub timestamp: DateTime<Utc>,
pub profile: String,
pub prompt: String,
pub cached_sections: usize,
pub dynamic_sections: usize,
pub cached_tokens: usize,
pub dynamic_tokens: usize,
pub total_tokens: usize,
pub cache_efficiency: f64,
pub session_id: Option<String>,
pub conversation_id: Option<String>,
}
impl DumpEntry {
pub fn from_prompt(prompt: &AssembledPrompt, session_id: Option<String>) -> Self {
Self {
timestamp: Utc::now(),
profile: prompt.profile.to_string(),
prompt: prompt.prompt.clone(),
cached_sections: prompt.cached_sections,
dynamic_sections: prompt.dynamic_sections,
cached_tokens: prompt.cached_tokens,
dynamic_tokens: prompt.dynamic_tokens,
total_tokens: prompt.total_tokens,
cache_efficiency: prompt.cache_efficiency(),
session_id,
conversation_id: None,
}
}
pub fn with_conversation(mut self, conversation_id: String) -> Self {
self.conversation_id = Some(conversation_id);
self
}
}
pub struct PromptDumper {
dump_path: Option<PathBuf>,
dump_enabled: bool,
print_enabled: bool,
session_id: Option<String>,
entries: Vec<DumpEntry>,
buffer_size: usize,
}
impl PromptDumper {
pub fn new() -> Self {
Self {
dump_path: None,
dump_enabled: false,
print_enabled: false,
session_id: None,
entries: Vec::new(),
buffer_size: 100,
}
}
pub fn enable_file_dump<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.dump_path = Some(path.into());
self.dump_enabled = true;
self
}
pub fn enable_print(mut self) -> Self {
self.print_enabled = true;
self
}
pub fn with_session(mut self, session_id: String) -> Self {
self.session_id = Some(session_id);
self
}
pub fn with_buffer_size(mut self, size: usize) -> Self {
self.buffer_size = size;
self
}
pub fn dump(&mut self, prompt: &AssembledPrompt) {
let entry = DumpEntry::from_prompt(prompt, self.session_id.clone());
if self.print_enabled {
self.print_entry(&entry);
}
self.entries.push(entry);
if self.entries.len() >= self.buffer_size {
self.flush();
}
}
pub fn dump_with_conversation(&mut self, prompt: &AssembledPrompt, conversation_id: String) {
let entry = DumpEntry::from_prompt(prompt, self.session_id.clone())
.with_conversation(conversation_id);
if self.print_enabled {
self.print_entry(&entry);
}
self.entries.push(entry);
if self.entries.len() >= self.buffer_size {
self.flush();
}
}
fn print_entry(&self, entry: &DumpEntry) {
println!("=== Prompt Dump ===");
println!("Timestamp: {}", entry.timestamp);
println!("Profile: {}", entry.profile);
println!(
"Sections: {} cached, {} dynamic",
entry.cached_sections, entry.dynamic_sections
);
println!(
"Tokens: {} cached, {} dynamic, {} total",
entry.cached_tokens, entry.dynamic_tokens, entry.total_tokens
);
println!("Cache efficiency: {:.1}%", entry.cache_efficiency);
println!("--- Prompt Content ---");
if entry.prompt.len() > 2000 {
println!(
"{}... (truncated, {} chars total)",
&entry.prompt[..2000],
entry.prompt.len()
);
} else {
println!("{}", entry.prompt);
}
println!("=== End Dump ===");
}
pub fn flush(&mut self) {
if !self.dump_enabled || self.dump_path.is_none() || self.entries.is_empty() {
return;
}
let path = self.dump_path.as_ref().unwrap();
if let Some(parent) = path.parent() {
if !parent.exists() {
if let Err(e) = std::fs::create_dir_all(parent) {
log::warn!("Failed to create dump directory: {}", e);
return;
}
}
}
match std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
{
Ok(mut file) => {
use std::io::Write;
for entry in &self.entries {
match serde_json::to_string(entry) {
Ok(json) => {
if let Err(e) = writeln!(file, "{}", json) {
log::warn!("Failed to write dump entry: {}", e);
}
}
Err(e) => log::warn!("Failed to serialize dump entry: {}", e),
}
}
}
Err(e) => log::warn!("Failed to open dump file {}: {}", path.display(), e),
}
self.entries.clear();
}
pub fn entries(&self) -> &[DumpEntry] {
&self.entries
}
pub fn clear(&mut self) {
self.entries.clear();
}
pub fn analyze_prompt(prompt: &str) -> PromptAnalysis {
let mut analysis = PromptAnalysis::default();
let section_pattern = get_section_pattern();
for cap in section_pattern.captures_iter(prompt) {
analysis.sections.push(cap[1].to_string());
}
let tag_pattern = get_tag_pattern();
for cap in tag_pattern.captures_iter(prompt) {
let tag = cap[1].to_string();
analysis.xml_tags.push(tag.clone());
analysis
.xml_tag_counts
.entry(tag)
.and_modify(|c| *c += 1)
.or_insert(1);
}
analysis.has_cache_boundary = prompt.contains(crate::prompt::CACHE_BOUNDARY);
analysis.estimated_tokens = crate::prompt::cache::estimate_tokens(prompt);
analysis.char_count = prompt.len();
analysis.line_count = prompt.lines().count();
analysis
}
}
impl Default for PromptDumper {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PromptAnalysis {
pub sections: Vec<String>,
pub xml_tags: Vec<String>,
pub xml_tag_counts: std::collections::HashMap<String, usize>,
pub has_cache_boundary: bool,
pub estimated_tokens: usize,
pub char_count: usize,
pub line_count: usize,
}
impl PromptAnalysis {
pub fn print_summary(&self) {
println!("Prompt Analysis Summary:");
println!(" Sections: {:?}", self.sections);
println!(
" XML tags: {} unique, {:?} counts",
self.xml_tags.len(),
self.xml_tag_counts
);
println!(" Cache boundary: {}", self.has_cache_boundary);
println!(" Tokens estimate: {}", self.estimated_tokens);
println!(" Characters: {}", self.char_count);
println!(" Lines: {}", self.line_count);
}
}
pub fn read_dump_file<P: AsRef<Path>>(path: P) -> Vec<DumpEntry> {
let path = path.as_ref();
if !path.exists() {
return Vec::new();
}
let content = std::fs::read_to_string(path).unwrap_or_default();
content
.lines()
.filter_map(|line| serde_json::from_str::<DumpEntry>(line).ok())
.collect()
}
pub fn analyze_dump_file<P: AsRef<Path>>(path: P) -> DumpFileAnalysis {
let entries = read_dump_file(path);
let mut analysis = DumpFileAnalysis::default();
analysis.total_entries = entries.len();
for entry in &entries {
analysis.total_tokens += entry.total_tokens;
analysis.avg_tokens += entry.total_tokens;
analysis
.profile_counts
.entry(entry.profile.clone())
.and_modify(|c| *c += 1)
.or_insert(1);
if entry.cache_efficiency > analysis.max_cache_efficiency {
analysis.max_cache_efficiency = entry.cache_efficiency;
}
if entry.cache_efficiency < analysis.min_cache_efficiency
|| analysis.min_cache_efficiency == 0.0
{
analysis.min_cache_efficiency = entry.cache_efficiency;
}
}
if analysis.total_entries > 0 {
analysis.avg_tokens /= analysis.total_entries;
analysis.avg_cache_efficiency =
entries.iter().map(|e| e.cache_efficiency).sum::<f64>() / analysis.total_entries as f64;
}
analysis
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DumpFileAnalysis {
pub total_entries: usize,
pub total_tokens: usize,
pub avg_tokens: usize,
pub profile_counts: std::collections::HashMap<String, usize>,
pub max_cache_efficiency: f64,
pub min_cache_efficiency: f64,
pub avg_cache_efficiency: f64,
}
impl DumpFileAnalysis {
pub fn print_summary(&self) {
println!("Dump File Analysis:");
println!(" Total entries: {}", self.total_entries);
println!(" Total tokens: {}", self.total_tokens);
println!(" Average tokens: {}", self.avg_tokens);
println!(" Profile distribution: {:?}", self.profile_counts);
println!(
" Cache efficiency: min {:.1}%, max {:.1}%, avg {:.1}%",
self.min_cache_efficiency, self.max_cache_efficiency, self.avg_cache_efficiency
);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dump_entry_creation() {
let mut orchestrator =
crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
orchestrator.add_section(crate::prompt::PromptSection::static_section(
"test",
"test content",
));
let assembled = orchestrator.assemble();
let entry = DumpEntry::from_prompt(&assembled, Some("session-1".to_string()));
assert_eq!(entry.profile, "default");
assert!(entry.prompt.contains("test"));
assert_eq!(entry.session_id, Some("session-1".to_string()));
}
#[test]
fn test_dumper_basic() {
let mut dumper = PromptDumper::new().enable_print();
let mut orchestrator =
crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
orchestrator.add_section(crate::prompt::PromptSection::static_section(
"identity",
"You are AI",
));
let assembled = orchestrator.assemble();
dumper.dump(&assembled);
assert_eq!(dumper.entries().len(), 1);
}
#[test]
fn test_analyze_prompt() {
let prompt = "[identity]\nYou are AI\n\n<context>\nSome context\n</context>";
let analysis = PromptDumper::analyze_prompt(prompt);
assert!(analysis.sections.contains(&"identity".to_string()));
assert!(analysis.xml_tags.contains(&"context".to_string()));
assert!(!analysis.has_cache_boundary);
assert!(analysis.estimated_tokens > 0);
}
#[test]
fn test_prompt_analysis_summary() {
let prompt = "[test]\nContent";
let analysis = PromptDumper::analyze_prompt(prompt);
analysis.print_summary();
}
#[test]
fn test_dump_file_analysis() {
let temp_file = tempfile::NamedTempFile::new().unwrap();
let path = temp_file.path();
let mut dumper = PromptDumper::new()
.enable_file_dump(path)
.with_session("test-session".to_string());
let mut orchestrator =
crate::prompt::PromptOrchestrator::new(std::env::current_dir().unwrap());
orchestrator.add_section(crate::prompt::PromptSection::static_section(
"test", "content",
));
for _ in 0..5 {
let assembled = orchestrator.assemble();
dumper.dump(&assembled);
}
dumper.flush();
let analysis = analyze_dump_file(path);
assert_eq!(analysis.total_entries, 5);
assert!(analysis.avg_tokens > 0);
analysis.print_summary();
}
}