reasonkit/thinktool/
trace.rs

1//! Execution trace types for auditability
2//!
3//! Every protocol execution is traced step-by-step for:
4//! - Debugging: Identify where reasoning went wrong
5//! - Auditability: Know exactly how conclusions were reached
6//! - Reproducibility: Re-run with same inputs
7//! - Learning: Improve protocols based on traces
8
9use chrono::{DateTime, Utc};
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13use super::step::{StepOutput, TokenUsage};
14
15/// A complete execution trace
16#[derive(Debug, Clone, Serialize, Deserialize, Default)]
17pub struct ExecutionTrace {
18    /// Unique trace identifier
19    pub id: Uuid,
20
21    /// Protocol that was executed
22    pub protocol_id: String,
23
24    /// Protocol version
25    pub protocol_version: String,
26
27    /// Input provided to the protocol
28    pub input: serde_json::Value,
29
30    /// Step-by-step execution record
31    pub steps: Vec<StepTrace>,
32
33    /// Final output (if completed)
34    pub output: Option<serde_json::Value>,
35
36    /// Overall execution status
37    pub status: ExecutionStatus,
38
39    /// Timing information
40    pub timing: TimingInfo,
41
42    /// Total token usage
43    pub tokens: TokenUsage,
44
45    /// Overall confidence
46    pub confidence: f64,
47
48    /// Execution metadata
49    pub metadata: TraceMetadata,
50}
51
52/// Trace of a single step execution
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct StepTrace {
55    /// Step identifier
56    pub step_id: String,
57
58    /// Step index (0-based)
59    pub index: usize,
60
61    /// Actual prompt sent to LLM
62    pub prompt: String,
63
64    /// Raw LLM response
65    pub raw_response: String,
66
67    /// Parsed/structured output
68    pub parsed_output: StepOutput,
69
70    /// Step confidence score
71    pub confidence: f64,
72
73    /// Step execution time in milliseconds
74    pub duration_ms: u64,
75
76    /// Tokens used for this step
77    pub tokens: TokenUsage,
78
79    /// Step status
80    pub status: StepStatus,
81
82    /// Error message (if failed)
83    pub error: Option<String>,
84
85    /// Timestamp when step started
86    pub started_at: DateTime<Utc>,
87
88    /// Timestamp when step completed
89    pub completed_at: Option<DateTime<Utc>>,
90}
91
92/// Overall execution status
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
94#[serde(rename_all = "snake_case")]
95#[derive(Default)]
96pub enum ExecutionStatus {
97    /// Execution is running
98    #[default]
99    Running,
100    /// All steps completed successfully
101    Completed,
102    /// Execution failed
103    Failed,
104    /// Execution was cancelled
105    Cancelled,
106    /// Execution timed out
107    TimedOut,
108    /// Paused (can be resumed)
109    Paused,
110}
111
112/// Individual step status
113#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
114#[serde(rename_all = "snake_case")]
115#[derive(Default)]
116pub enum StepStatus {
117    /// Waiting to execute
118    #[default]
119    Pending,
120    /// Currently executing
121    Running,
122    /// Completed successfully
123    Completed,
124    /// Failed with error
125    Failed,
126    /// Skipped (condition not met)
127    Skipped,
128}
129
130/// Timing information for execution
131#[derive(Debug, Clone, Default, Serialize, Deserialize)]
132pub struct TimingInfo {
133    /// When execution started
134    pub started_at: Option<DateTime<Utc>>,
135
136    /// When execution completed
137    pub completed_at: Option<DateTime<Utc>>,
138
139    /// Total duration in milliseconds
140    pub total_duration_ms: u64,
141
142    /// Time spent in LLM calls
143    pub llm_duration_ms: u64,
144
145    /// Time spent in local processing
146    pub processing_duration_ms: u64,
147}
148
149impl TimingInfo {
150    /// Start timing
151    pub fn start(&mut self) {
152        self.started_at = Some(Utc::now());
153    }
154
155    /// Complete timing
156    pub fn complete(&mut self) {
157        self.completed_at = Some(Utc::now());
158        if let Some(start) = self.started_at {
159            self.total_duration_ms = (Utc::now() - start).num_milliseconds() as u64;
160        }
161    }
162}
163
164/// Execution metadata
165#[derive(Debug, Clone, Default, Serialize, Deserialize)]
166pub struct TraceMetadata {
167    /// LLM model used
168    pub model: Option<String>,
169
170    /// LLM provider (openai, anthropic, etc.)
171    pub provider: Option<String>,
172
173    /// Temperature setting
174    pub temperature: Option<f64>,
175
176    /// Profile used (if any)
177    pub profile: Option<String>,
178
179    /// User-provided tags
180    #[serde(default)]
181    pub tags: Vec<String>,
182
183    /// Environment info
184    pub environment: Option<String>,
185}
186
187impl ExecutionTrace {
188    /// Create a new execution trace
189    pub fn new(protocol_id: impl Into<String>, protocol_version: impl Into<String>) -> Self {
190        Self {
191            id: Uuid::new_v4(),
192            protocol_id: protocol_id.into(),
193            protocol_version: protocol_version.into(),
194            input: serde_json::Value::Null,
195            steps: Vec::new(),
196            output: None,
197            status: ExecutionStatus::Running,
198            timing: TimingInfo::default(),
199            tokens: TokenUsage::default(),
200            confidence: 0.0,
201            metadata: TraceMetadata::default(),
202        }
203    }
204
205    /// Set input
206    pub fn with_input(mut self, input: serde_json::Value) -> Self {
207        self.input = input;
208        self
209    }
210
211    /// Add a step trace
212    pub fn add_step(&mut self, step: StepTrace) {
213        self.tokens.add(&step.tokens);
214        self.steps.push(step);
215    }
216
217    /// Mark as completed
218    pub fn complete(&mut self, output: serde_json::Value, confidence: f64) {
219        self.output = Some(output);
220        self.confidence = confidence;
221        self.status = ExecutionStatus::Completed;
222        self.timing.complete();
223    }
224
225    /// Mark as failed
226    pub fn fail(&mut self, error: &str) {
227        self.status = ExecutionStatus::Failed;
228        self.timing.complete();
229        // Add error to last step if exists
230        if let Some(last) = self.steps.last_mut() {
231            last.error = Some(error.to_string());
232            last.status = StepStatus::Failed;
233        }
234    }
235
236    /// Get completed step count
237    pub fn completed_steps(&self) -> usize {
238        self.steps
239            .iter()
240            .filter(|s| s.status == StepStatus::Completed)
241            .count()
242    }
243
244    /// Get average step confidence
245    pub fn average_confidence(&self) -> f64 {
246        if self.steps.is_empty() {
247            return 0.0;
248        }
249        let sum: f64 = self.steps.iter().map(|s| s.confidence).sum();
250        sum / self.steps.len() as f64
251    }
252
253    /// Export trace to JSON string
254    pub fn to_json(&self) -> Result<String, serde_json::Error> {
255        serde_json::to_string_pretty(self)
256    }
257
258    /// Export trace to compact JSON
259    pub fn to_json_compact(&self) -> Result<String, serde_json::Error> {
260        serde_json::to_string(self)
261    }
262}
263
264impl StepTrace {
265    /// Create a new step trace
266    pub fn new(step_id: impl Into<String>, index: usize) -> Self {
267        Self {
268            step_id: step_id.into(),
269            index,
270            prompt: String::new(),
271            raw_response: String::new(),
272            parsed_output: StepOutput::Empty,
273            confidence: 0.0,
274            duration_ms: 0,
275            tokens: TokenUsage::default(),
276            status: StepStatus::Pending,
277            error: None,
278            started_at: Utc::now(),
279            completed_at: None,
280        }
281    }
282
283    /// Mark step as running
284    pub fn start(&mut self) {
285        self.status = StepStatus::Running;
286        self.started_at = Utc::now();
287    }
288
289    /// Mark step as completed
290    pub fn complete(&mut self, output: StepOutput, confidence: f64) {
291        self.status = StepStatus::Completed;
292        self.parsed_output = output;
293        self.confidence = confidence;
294        self.completed_at = Some(Utc::now());
295        self.duration_ms = (Utc::now() - self.started_at).num_milliseconds() as u64;
296    }
297
298    /// Mark step as failed
299    pub fn fail(&mut self, error: impl Into<String>) {
300        self.status = StepStatus::Failed;
301        self.error = Some(error.into());
302        self.completed_at = Some(Utc::now());
303    }
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309    use serde_json::json;
310    use std::collections::HashMap;
311
312    // =========================================================================
313    // SECTION 1: ExecutionTrace Creation Tests
314    // =========================================================================
315
316    #[test]
317    fn test_execution_trace_creation() {
318        let trace = ExecutionTrace::new("gigathink", "1.0.0");
319
320        assert_eq!(trace.protocol_id, "gigathink");
321        assert_eq!(trace.protocol_version, "1.0.0");
322        assert_eq!(trace.status, ExecutionStatus::Running);
323        assert!(trace.steps.is_empty());
324        assert!(trace.output.is_none());
325        assert_eq!(trace.confidence, 0.0);
326        assert_eq!(trace.tokens.total_tokens, 0);
327    }
328
329    #[test]
330    fn test_execution_trace_unique_id() {
331        let trace1 = ExecutionTrace::new("test", "1.0.0");
332        let trace2 = ExecutionTrace::new("test", "1.0.0");
333
334        // Each trace should have a unique UUID
335        assert_ne!(trace1.id, trace2.id);
336    }
337
338    #[test]
339    fn test_execution_trace_default() {
340        let trace = ExecutionTrace::default();
341
342        assert_eq!(trace.protocol_id, "");
343        assert_eq!(trace.protocol_version, "");
344        assert_eq!(trace.status, ExecutionStatus::Running);
345        assert!(trace.steps.is_empty());
346    }
347
348    #[test]
349    fn test_execution_trace_with_input() {
350        let input = json!({
351            "query": "What is Rust?",
352            "context": ["systems programming", "memory safety"]
353        });
354
355        let trace = ExecutionTrace::new("laserlogic", "2.0.0").with_input(input.clone());
356
357        assert_eq!(trace.input, input);
358        assert_eq!(trace.input["query"], "What is Rust?");
359    }
360
361    #[test]
362    fn test_execution_trace_with_complex_input() {
363        let input = json!({
364            "nested": {
365                "deeply": {
366                    "value": 42,
367                    "array": [1, 2, 3]
368                }
369            },
370            "boolean": true,
371            "null_value": null
372        });
373
374        let trace = ExecutionTrace::new("test", "1.0.0").with_input(input.clone());
375
376        assert_eq!(trace.input["nested"]["deeply"]["value"], 42);
377        assert_eq!(trace.input["boolean"], true);
378        assert!(trace.input["null_value"].is_null());
379    }
380
381    // =========================================================================
382    // SECTION 2: StepTrace Creation and Lifecycle Tests
383    // =========================================================================
384
385    #[test]
386    fn test_step_trace_creation() {
387        let step = StepTrace::new("analyze", 0);
388
389        assert_eq!(step.step_id, "analyze");
390        assert_eq!(step.index, 0);
391        assert_eq!(step.status, StepStatus::Pending);
392        assert!(step.prompt.is_empty());
393        assert!(step.raw_response.is_empty());
394        assert!(step.error.is_none());
395        assert!(step.completed_at.is_none());
396    }
397
398    #[test]
399    fn test_step_trace_start() {
400        let mut step = StepTrace::new("step1", 0);
401        let before_start = Utc::now();
402
403        step.start();
404
405        assert_eq!(step.status, StepStatus::Running);
406        assert!(step.started_at >= before_start);
407    }
408
409    #[test]
410    fn test_step_trace_complete() {
411        let mut step = StepTrace::new("step1", 0);
412        step.start();
413
414        let output = StepOutput::Text {
415            content: "Analysis complete".to_string(),
416        };
417        step.complete(output.clone(), 0.95);
418
419        assert_eq!(step.status, StepStatus::Completed);
420        assert_eq!(step.confidence, 0.95);
421        assert!(step.completed_at.is_some());
422        // duration_ms is u64; just ensure it's available.
423        let _ = step.duration_ms;
424
425        // Verify output was stored
426        if let StepOutput::Text { content } = &step.parsed_output {
427            assert_eq!(content, "Analysis complete");
428        } else {
429            panic!("Expected Text output");
430        }
431    }
432
433    #[test]
434    fn test_step_trace_fail() {
435        let mut step = StepTrace::new("step1", 0);
436        step.start();
437
438        step.fail("LLM timeout occurred");
439
440        assert_eq!(step.status, StepStatus::Failed);
441        assert_eq!(step.error, Some("LLM timeout occurred".to_string()));
442        assert!(step.completed_at.is_some());
443    }
444
445    #[test]
446    fn test_step_trace_with_prompt_and_response() {
447        let mut step = StepTrace::new("reasoning", 1);
448        step.prompt = "Analyze the following code for bugs...".to_string();
449        step.raw_response = "I found 3 potential issues: 1) null pointer...".to_string();
450
451        assert!(step.prompt.contains("Analyze"));
452        assert!(step.raw_response.contains("3 potential issues"));
453    }
454
455    #[test]
456    fn test_step_trace_with_tokens() {
457        let mut step = StepTrace::new("step1", 0);
458        step.tokens = TokenUsage::new(500, 200, 0.0035);
459
460        assert_eq!(step.tokens.input_tokens, 500);
461        assert_eq!(step.tokens.output_tokens, 200);
462        assert_eq!(step.tokens.total_tokens, 700);
463        assert!((step.tokens.cost_usd - 0.0035).abs() < 0.0001);
464    }
465
466    // =========================================================================
467    // SECTION 3: Nested Trace Spans (Steps within Traces)
468    // =========================================================================
469
470    #[test]
471    fn test_trace_with_single_step() {
472        let mut trace = ExecutionTrace::new("test", "1.0.0");
473
474        let mut step = StepTrace::new("step1", 0);
475        step.tokens = TokenUsage::new(100, 50, 0.001);
476        step.complete(
477            StepOutput::Text {
478                content: "Hello".to_string(),
479            },
480            0.9,
481        );
482
483        trace.add_step(step);
484
485        assert_eq!(trace.steps.len(), 1);
486        assert_eq!(trace.tokens.total_tokens, 150);
487        assert_eq!(trace.completed_steps(), 1);
488    }
489
490    #[test]
491    fn test_trace_with_multiple_steps() {
492        let mut trace = ExecutionTrace::new("powercombo", "1.0.0");
493
494        // Add 5 steps simulating a full ThinkTool pipeline
495        let step_configs = [
496            ("gigathink", 200, 150, 0.85),
497            ("laserlogic", 180, 120, 0.90),
498            ("bedrock", 250, 200, 0.88),
499            ("proofguard", 300, 250, 0.92),
500            ("brutalhonesty", 150, 100, 0.95),
501        ];
502
503        for (i, (name, input_tok, output_tok, conf)) in step_configs.iter().enumerate() {
504            let mut step = StepTrace::new(*name, i);
505            step.tokens = TokenUsage::new(*input_tok, *output_tok, 0.001);
506            step.complete(
507                StepOutput::Text {
508                    content: format!("{} output", name),
509                },
510                *conf,
511            );
512            trace.add_step(step);
513        }
514
515        assert_eq!(trace.steps.len(), 5);
516        assert_eq!(trace.completed_steps(), 5);
517
518        // Verify token aggregation
519        let expected_total: u32 = step_configs.iter().map(|(_, i, o, _)| i + o).sum();
520        assert_eq!(trace.tokens.total_tokens, expected_total);
521    }
522
523    #[test]
524    fn test_trace_with_mixed_step_statuses() {
525        let mut trace = ExecutionTrace::new("test", "1.0.0");
526
527        // Step 1: Completed
528        let mut step1 = StepTrace::new("step1", 0);
529        step1.complete(StepOutput::Empty, 0.9);
530        trace.add_step(step1);
531
532        // Step 2: Failed
533        let mut step2 = StepTrace::new("step2", 1);
534        step2.fail("Validation error");
535        trace.add_step(step2);
536
537        // Step 3: Still pending
538        let step3 = StepTrace::new("step3", 2);
539        trace.add_step(step3);
540
541        assert_eq!(trace.steps.len(), 3);
542        assert_eq!(trace.completed_steps(), 1);
543        assert_eq!(trace.steps[1].status, StepStatus::Failed);
544        assert_eq!(trace.steps[2].status, StepStatus::Pending);
545    }
546
547    #[test]
548    fn test_trace_step_ordering() {
549        let mut trace = ExecutionTrace::new("test", "1.0.0");
550
551        for i in 0..10 {
552            let step = StepTrace::new(format!("step_{}", i), i);
553            trace.add_step(step);
554        }
555
556        // Verify steps maintain insertion order
557        for (i, step) in trace.steps.iter().enumerate() {
558            assert_eq!(step.index, i);
559            assert_eq!(step.step_id, format!("step_{}", i));
560        }
561    }
562
563    // =========================================================================
564    // SECTION 4: Trace Metadata Tests
565    // =========================================================================
566
567    #[test]
568    fn test_trace_metadata_default() {
569        let metadata = TraceMetadata::default();
570
571        assert!(metadata.model.is_none());
572        assert!(metadata.provider.is_none());
573        assert!(metadata.temperature.is_none());
574        assert!(metadata.profile.is_none());
575        assert!(metadata.tags.is_empty());
576        assert!(metadata.environment.is_none());
577    }
578
579    #[test]
580    fn test_trace_metadata_full() {
581        let metadata = TraceMetadata {
582            model: Some("claude-sonnet-4-5".to_string()),
583            provider: Some("anthropic".to_string()),
584            temperature: Some(0.7),
585            profile: Some("paranoid".to_string()),
586            tags: vec!["production".to_string(), "critical".to_string()],
587            environment: Some("aws-us-east-1".to_string()),
588        };
589
590        assert_eq!(metadata.model, Some("claude-sonnet-4-5".to_string()));
591        assert_eq!(metadata.provider, Some("anthropic".to_string()));
592        assert_eq!(metadata.temperature, Some(0.7));
593        assert_eq!(metadata.profile, Some("paranoid".to_string()));
594        assert_eq!(metadata.tags.len(), 2);
595        assert!(metadata.tags.contains(&"production".to_string()));
596    }
597
598    #[test]
599    fn test_trace_with_metadata() {
600        let mut trace = ExecutionTrace::new("test", "1.0.0");
601        trace.metadata.model = Some("gpt-4".to_string());
602        trace.metadata.provider = Some("openai".to_string());
603        trace.metadata.temperature = Some(0.5);
604
605        assert_eq!(trace.metadata.model, Some("gpt-4".to_string()));
606        assert_eq!(trace.metadata.provider, Some("openai".to_string()));
607    }
608
609    // =========================================================================
610    // SECTION 5: Timing Information Tests
611    // =========================================================================
612
613    #[test]
614    fn test_timing_info_default() {
615        let timing = TimingInfo::default();
616
617        assert!(timing.started_at.is_none());
618        assert!(timing.completed_at.is_none());
619        assert_eq!(timing.total_duration_ms, 0);
620        assert_eq!(timing.llm_duration_ms, 0);
621        assert_eq!(timing.processing_duration_ms, 0);
622    }
623
624    #[test]
625    fn test_timing_info_start() {
626        let mut timing = TimingInfo::default();
627        let before = Utc::now();
628
629        timing.start();
630
631        assert!(timing.started_at.is_some());
632        assert!(timing.started_at.unwrap() >= before);
633        assert!(timing.completed_at.is_none());
634    }
635
636    #[test]
637    fn test_timing_info_complete() {
638        let mut timing = TimingInfo::default();
639        timing.start();
640
641        // Small delay to ensure measurable duration
642        std::thread::sleep(std::time::Duration::from_millis(10));
643
644        timing.complete();
645
646        assert!(timing.completed_at.is_some());
647        assert!(timing.total_duration_ms >= 10);
648        assert!(timing.completed_at.unwrap() > timing.started_at.unwrap());
649    }
650
651    #[test]
652    fn test_timing_info_complete_without_start() {
653        let mut timing = TimingInfo::default();
654
655        // Complete without starting - should handle gracefully
656        timing.complete();
657
658        assert!(timing.completed_at.is_some());
659        // Duration should remain 0 since there's no start time
660        assert_eq!(timing.total_duration_ms, 0);
661    }
662
663    #[test]
664    fn test_step_timing_captures_duration() {
665        let mut step = StepTrace::new("timed_step", 0);
666        step.start();
667
668        std::thread::sleep(std::time::Duration::from_millis(15));
669
670        step.complete(StepOutput::Empty, 0.9);
671
672        // Duration should be at least 15ms
673        assert!(step.duration_ms >= 15);
674        assert!(step.completed_at.is_some());
675    }
676
677    // =========================================================================
678    // SECTION 6: Average Confidence Tests
679    // =========================================================================
680
681    #[test]
682    fn test_average_confidence_empty() {
683        let trace = ExecutionTrace::new("test", "1.0.0");
684        assert_eq!(trace.average_confidence(), 0.0);
685    }
686
687    #[test]
688    fn test_average_confidence_single_step() {
689        let mut trace = ExecutionTrace::new("test", "1.0.0");
690
691        let mut step = StepTrace::new("step1", 0);
692        step.confidence = 0.85;
693        trace.add_step(step);
694
695        assert!((trace.average_confidence() - 0.85).abs() < 0.001);
696    }
697
698    #[test]
699    fn test_average_confidence_multiple_steps() {
700        let mut trace = ExecutionTrace::new("test", "1.0.0");
701
702        for (i, conf) in [0.8, 0.9, 0.7].iter().enumerate() {
703            let mut step = StepTrace::new(format!("step{}", i), i);
704            step.confidence = *conf;
705            step.status = StepStatus::Completed;
706            trace.add_step(step);
707        }
708
709        // (0.8 + 0.9 + 0.7) / 3 = 0.8
710        assert!((trace.average_confidence() - 0.8).abs() < 0.001);
711    }
712
713    #[test]
714    fn test_average_confidence_includes_failed_steps() {
715        let mut trace = ExecutionTrace::new("test", "1.0.0");
716
717        let mut step1 = StepTrace::new("step1", 0);
718        step1.confidence = 0.9;
719        step1.status = StepStatus::Completed;
720        trace.add_step(step1);
721
722        let mut step2 = StepTrace::new("step2", 1);
723        step2.confidence = 0.0; // Failed step with 0 confidence
724        step2.status = StepStatus::Failed;
725        trace.add_step(step2);
726
727        // Average includes both: (0.9 + 0.0) / 2 = 0.45
728        assert!((trace.average_confidence() - 0.45).abs() < 0.001);
729    }
730
731    // =========================================================================
732    // SECTION 7: Trace Completion and Failure Tests
733    // =========================================================================
734
735    #[test]
736    fn test_trace_complete() {
737        let mut trace = ExecutionTrace::new("test", "1.0.0");
738        trace.timing.start();
739
740        let output = json!({"result": "success", "data": [1, 2, 3]});
741        trace.complete(output.clone(), 0.92);
742
743        assert_eq!(trace.status, ExecutionStatus::Completed);
744        assert_eq!(trace.confidence, 0.92);
745        assert!(trace.output.is_some());
746        assert_eq!(trace.output.as_ref().unwrap()["result"], "success");
747        assert!(trace.timing.completed_at.is_some());
748    }
749
750    #[test]
751    fn test_trace_fail_with_steps() {
752        let mut trace = ExecutionTrace::new("test", "1.0.0");
753        trace.timing.start();
754
755        // Add a step
756        let mut step = StepTrace::new("step1", 0);
757        step.start();
758        trace.add_step(step);
759
760        // Fail the trace
761        trace.fail("Connection timeout");
762
763        assert_eq!(trace.status, ExecutionStatus::Failed);
764        assert!(trace.timing.completed_at.is_some());
765
766        // Last step should be marked as failed
767        let last_step = trace.steps.last().unwrap();
768        assert_eq!(last_step.status, StepStatus::Failed);
769        assert_eq!(last_step.error, Some("Connection timeout".to_string()));
770    }
771
772    #[test]
773    fn test_trace_fail_without_steps() {
774        let mut trace = ExecutionTrace::new("test", "1.0.0");
775        trace.timing.start();
776
777        // Fail without any steps - should not panic
778        trace.fail("Early failure");
779
780        assert_eq!(trace.status, ExecutionStatus::Failed);
781        assert!(trace.steps.is_empty());
782    }
783
784    // =========================================================================
785    // SECTION 8: JSON Export Tests
786    // =========================================================================
787
788    #[test]
789    fn test_trace_to_json() {
790        let trace =
791            ExecutionTrace::new("test_protocol", "1.0.0").with_input(json!({"query": "test"}));
792
793        let json_str = trace.to_json().expect("JSON serialization should succeed");
794
795        assert!(json_str.contains("test_protocol"));
796        assert!(json_str.contains("1.0.0"));
797        assert!(json_str.contains("query"));
798        // Pretty format should have newlines
799        assert!(json_str.contains('\n'));
800    }
801
802    #[test]
803    fn test_trace_to_json_compact() {
804        let trace = ExecutionTrace::new("test", "1.0.0");
805
806        let json_str = trace
807            .to_json_compact()
808            .expect("Compact JSON should succeed");
809
810        // Compact format should not have pretty-print newlines (except in strings)
811        let lines: Vec<&str> = json_str.lines().collect();
812        assert_eq!(lines.len(), 1);
813    }
814
815    #[test]
816    fn test_trace_json_roundtrip() {
817        let mut original = ExecutionTrace::new("roundtrip", "2.0.0")
818            .with_input(json!({"key": "value", "number": 42}));
819
820        original.metadata.model = Some("test-model".to_string());
821        original.metadata.tags = vec!["tag1".to_string(), "tag2".to_string()];
822
823        let mut step = StepTrace::new("step1", 0);
824        step.prompt = "Test prompt".to_string();
825        step.raw_response = "Test response".to_string();
826        step.tokens = TokenUsage::new(100, 50, 0.001);
827        step.complete(
828            StepOutput::Text {
829                content: "Result".to_string(),
830            },
831            0.88,
832        );
833        original.add_step(step);
834
835        original.complete(json!({"final": "output"}), 0.9);
836
837        // Serialize
838        let json_str = original.to_json().expect("Serialization should succeed");
839
840        // Deserialize
841        let deserialized: ExecutionTrace =
842            serde_json::from_str(&json_str).expect("Deserialization should succeed");
843
844        // Verify key fields
845        assert_eq!(deserialized.protocol_id, original.protocol_id);
846        assert_eq!(deserialized.protocol_version, original.protocol_version);
847        assert_eq!(deserialized.id, original.id);
848        assert_eq!(deserialized.status, ExecutionStatus::Completed);
849        assert_eq!(deserialized.confidence, 0.9);
850        assert_eq!(deserialized.steps.len(), 1);
851        assert_eq!(deserialized.metadata.model, Some("test-model".to_string()));
852        assert_eq!(deserialized.metadata.tags.len(), 2);
853    }
854
855    #[test]
856    fn test_trace_json_with_all_step_outputs() {
857        let mut trace = ExecutionTrace::new("output_types", "1.0.0");
858
859        // Text output
860        let mut step1 = StepTrace::new("text_step", 0);
861        step1.complete(
862            StepOutput::Text {
863                content: "Hello world".to_string(),
864            },
865            0.9,
866        );
867        trace.add_step(step1);
868
869        // List output
870        let mut step2 = StepTrace::new("list_step", 1);
871        step2.complete(
872            StepOutput::List {
873                items: vec![
874                    super::super::step::ListItem::new("Item 1"),
875                    super::super::step::ListItem::with_confidence("Item 2", 0.95),
876                ],
877            },
878            0.85,
879        );
880        trace.add_step(step2);
881
882        // Structured output
883        let mut step3 = StepTrace::new("struct_step", 2);
884        let mut data = HashMap::new();
885        data.insert("key1".to_string(), json!("value1"));
886        data.insert("key2".to_string(), json!(123));
887        step3.complete(StepOutput::Structured { data }, 0.88);
888        trace.add_step(step3);
889
890        // Score output
891        let mut step4 = StepTrace::new("score_step", 3);
892        step4.complete(StepOutput::Score { value: 0.75 }, 0.92);
893        trace.add_step(step4);
894
895        // Boolean output
896        let mut step5 = StepTrace::new("bool_step", 4);
897        step5.complete(
898            StepOutput::Boolean {
899                value: true,
900                reason: Some("Validation passed".to_string()),
901            },
902            0.99,
903        );
904        trace.add_step(step5);
905
906        let json_str = trace.to_json().expect("Should serialize all output types");
907
908        // Verify all types are present
909        assert!(json_str.contains("Hello world"));
910        assert!(json_str.contains("Item 1"));
911        assert!(json_str.contains("key1"));
912        assert!(json_str.contains("0.75"));
913        assert!(json_str.contains("Validation passed"));
914
915        // Verify roundtrip
916        let deserialized: ExecutionTrace =
917            serde_json::from_str(&json_str).expect("Should deserialize");
918        assert_eq!(deserialized.steps.len(), 5);
919    }
920
921    // =========================================================================
922    // SECTION 9: Execution Status Tests
923    // =========================================================================
924
925    #[test]
926    fn test_execution_status_default() {
927        assert_eq!(ExecutionStatus::default(), ExecutionStatus::Running);
928    }
929
930    #[test]
931    fn test_execution_status_serialization() {
932        let statuses = vec![
933            (ExecutionStatus::Running, "\"running\""),
934            (ExecutionStatus::Completed, "\"completed\""),
935            (ExecutionStatus::Failed, "\"failed\""),
936            (ExecutionStatus::Cancelled, "\"cancelled\""),
937            (ExecutionStatus::TimedOut, "\"timed_out\""),
938            (ExecutionStatus::Paused, "\"paused\""),
939        ];
940
941        for (status, expected) in statuses {
942            let json = serde_json::to_string(&status).expect("Should serialize");
943            assert_eq!(json, expected);
944
945            let deserialized: ExecutionStatus =
946                serde_json::from_str(&json).expect("Should deserialize");
947            assert_eq!(deserialized, status);
948        }
949    }
950
951    #[test]
952    fn test_step_status_default() {
953        assert_eq!(StepStatus::default(), StepStatus::Pending);
954    }
955
956    #[test]
957    fn test_step_status_serialization() {
958        let statuses = vec![
959            (StepStatus::Pending, "\"pending\""),
960            (StepStatus::Running, "\"running\""),
961            (StepStatus::Completed, "\"completed\""),
962            (StepStatus::Failed, "\"failed\""),
963            (StepStatus::Skipped, "\"skipped\""),
964        ];
965
966        for (status, expected) in statuses {
967            let json = serde_json::to_string(&status).expect("Should serialize");
968            assert_eq!(json, expected);
969        }
970    }
971
972    // =========================================================================
973    // SECTION 10: Token Aggregation Tests
974    // =========================================================================
975
976    #[test]
977    fn test_token_aggregation_across_steps() {
978        let mut trace = ExecutionTrace::new("test", "1.0.0");
979
980        let step1 = {
981            let mut s = StepTrace::new("s1", 0);
982            s.tokens = TokenUsage::new(100, 50, 0.001);
983            s
984        };
985
986        let step2 = {
987            let mut s = StepTrace::new("s2", 1);
988            s.tokens = TokenUsage::new(200, 100, 0.002);
989            s
990        };
991
992        let step3 = {
993            let mut s = StepTrace::new("s3", 2);
994            s.tokens = TokenUsage::new(150, 75, 0.0015);
995            s
996        };
997
998        trace.add_step(step1);
999        trace.add_step(step2);
1000        trace.add_step(step3);
1001
1002        assert_eq!(trace.tokens.input_tokens, 450);
1003        assert_eq!(trace.tokens.output_tokens, 225);
1004        assert_eq!(trace.tokens.total_tokens, 675);
1005        assert!((trace.tokens.cost_usd - 0.0045).abs() < 0.0001);
1006    }
1007
1008    // =========================================================================
1009    // SECTION 11: Edge Cases and Boundary Conditions
1010    // =========================================================================
1011
1012    #[test]
1013    fn test_empty_string_protocol_id() {
1014        let trace = ExecutionTrace::new("", "");
1015
1016        assert_eq!(trace.protocol_id, "");
1017        assert_eq!(trace.protocol_version, "");
1018
1019        // Should still serialize
1020        let json = trace.to_json().expect("Should serialize empty strings");
1021        assert!(json.contains("\"protocol_id\": \"\""));
1022    }
1023
1024    #[test]
1025    fn test_unicode_in_trace() {
1026        let trace = ExecutionTrace::new("test", "1.0.0")
1027            .with_input(json!({"query": "What is the meaning of life? "}));
1028
1029        let mut step = StepTrace::new("step1", 0);
1030        step.prompt = "Analyze: ...".to_string();
1031        step.raw_response = "The answer involves philosophical concepts".to_string();
1032        step.complete(
1033            StepOutput::Text {
1034                content: "Deep philosophical analysis complete".to_string(),
1035            },
1036            0.9,
1037        );
1038
1039        let mut trace = trace;
1040        trace.add_step(step);
1041
1042        let json = trace.to_json().expect("Should handle unicode");
1043        let deserialized: ExecutionTrace =
1044            serde_json::from_str(&json).expect("Should deserialize unicode");
1045
1046        assert!(deserialized.input["query"].as_str().unwrap().contains(""));
1047    }
1048
1049    #[test]
1050    fn test_very_long_response() {
1051        let mut step = StepTrace::new("long_response", 0);
1052
1053        // Create a very long response (100KB)
1054        let long_response: String = "x".repeat(100_000);
1055        step.raw_response = long_response.clone();
1056
1057        step.complete(
1058            StepOutput::Text {
1059                content: long_response.clone(),
1060            },
1061            0.9,
1062        );
1063
1064        let mut trace = ExecutionTrace::new("test", "1.0.0");
1065        trace.add_step(step);
1066
1067        let json = trace.to_json().expect("Should handle long strings");
1068        assert!(json.len() > 100_000);
1069
1070        let deserialized: ExecutionTrace = serde_json::from_str(&json).expect("Should deserialize");
1071        assert_eq!(deserialized.steps[0].raw_response.len(), 100_000);
1072    }
1073
1074    #[test]
1075    fn test_many_steps() {
1076        let mut trace = ExecutionTrace::new("stress_test", "1.0.0");
1077
1078        // Add 1000 steps
1079        for i in 0..1000 {
1080            let mut step = StepTrace::new(format!("step_{}", i), i);
1081            step.tokens = TokenUsage::new(10, 5, 0.0001);
1082            step.complete(StepOutput::Empty, 0.9);
1083            trace.add_step(step);
1084        }
1085
1086        assert_eq!(trace.steps.len(), 1000);
1087        assert_eq!(trace.completed_steps(), 1000);
1088        assert_eq!(trace.tokens.total_tokens, 15_000);
1089
1090        // Should still serialize efficiently
1091        let json = trace
1092            .to_json_compact()
1093            .expect("Should serialize many steps");
1094        let deserialized: ExecutionTrace = serde_json::from_str(&json).expect("Should deserialize");
1095        assert_eq!(deserialized.steps.len(), 1000);
1096    }
1097
1098    #[test]
1099    fn test_zero_confidence_steps() {
1100        let mut trace = ExecutionTrace::new("test", "1.0.0");
1101
1102        for i in 0..3 {
1103            let mut step = StepTrace::new(format!("step_{}", i), i);
1104            step.confidence = 0.0;
1105            step.status = StepStatus::Completed;
1106            trace.add_step(step);
1107        }
1108
1109        assert_eq!(trace.average_confidence(), 0.0);
1110    }
1111
1112    #[test]
1113    fn test_maximum_confidence() {
1114        let mut trace = ExecutionTrace::new("test", "1.0.0");
1115
1116        let mut step = StepTrace::new("perfect", 0);
1117        step.confidence = 1.0;
1118        step.status = StepStatus::Completed;
1119        trace.add_step(step);
1120
1121        assert_eq!(trace.average_confidence(), 1.0);
1122    }
1123
1124    // =========================================================================
1125    // SECTION 12: Clone and Debug Trait Tests
1126    // =========================================================================
1127
1128    #[test]
1129    fn test_execution_trace_clone() {
1130        let mut original = ExecutionTrace::new("test", "1.0.0");
1131        original.metadata.model = Some("gpt-4".to_string());
1132
1133        let mut step = StepTrace::new("step1", 0);
1134        step.complete(StepOutput::Empty, 0.9);
1135        original.add_step(step);
1136
1137        let cloned = original.clone();
1138
1139        // Clones should be equal
1140        assert_eq!(cloned.id, original.id);
1141        assert_eq!(cloned.protocol_id, original.protocol_id);
1142        assert_eq!(cloned.steps.len(), original.steps.len());
1143        assert_eq!(cloned.metadata.model, original.metadata.model);
1144    }
1145
1146    #[test]
1147    fn test_step_trace_clone() {
1148        let mut original = StepTrace::new("step1", 0);
1149        original.prompt = "Test prompt".to_string();
1150        original.tokens = TokenUsage::new(100, 50, 0.001);
1151        original.complete(
1152            StepOutput::Text {
1153                content: "Result".to_string(),
1154            },
1155            0.9,
1156        );
1157
1158        let cloned = original.clone();
1159
1160        assert_eq!(cloned.step_id, original.step_id);
1161        assert_eq!(cloned.prompt, original.prompt);
1162        assert_eq!(cloned.confidence, original.confidence);
1163        assert_eq!(cloned.tokens.total_tokens, original.tokens.total_tokens);
1164    }
1165
1166    #[test]
1167    fn test_debug_formatting() {
1168        let trace = ExecutionTrace::new("debug_test", "1.0.0");
1169
1170        // Debug should not panic and should contain key info
1171        let debug_str = format!("{:?}", trace);
1172
1173        assert!(debug_str.contains("ExecutionTrace"));
1174        assert!(debug_str.contains("debug_test"));
1175    }
1176
1177    // =========================================================================
1178    // SECTION 13: Completed Steps Counter Tests
1179    // =========================================================================
1180
1181    #[test]
1182    fn test_completed_steps_empty() {
1183        let trace = ExecutionTrace::new("test", "1.0.0");
1184        assert_eq!(trace.completed_steps(), 0);
1185    }
1186
1187    #[test]
1188    fn test_completed_steps_all_completed() {
1189        let mut trace = ExecutionTrace::new("test", "1.0.0");
1190
1191        for i in 0..5 {
1192            let mut step = StepTrace::new(format!("step_{}", i), i);
1193            step.complete(StepOutput::Empty, 0.9);
1194            trace.add_step(step);
1195        }
1196
1197        assert_eq!(trace.completed_steps(), 5);
1198    }
1199
1200    #[test]
1201    fn test_completed_steps_none_completed() {
1202        let mut trace = ExecutionTrace::new("test", "1.0.0");
1203
1204        for i in 0..3 {
1205            let step = StepTrace::new(format!("step_{}", i), i);
1206            // Status remains Pending
1207            trace.add_step(step);
1208        }
1209
1210        assert_eq!(trace.completed_steps(), 0);
1211    }
1212
1213    #[test]
1214    fn test_completed_steps_mixed() {
1215        let mut trace = ExecutionTrace::new("test", "1.0.0");
1216
1217        // 2 completed
1218        for i in 0..2 {
1219            let mut step = StepTrace::new(format!("completed_{}", i), i);
1220            step.complete(StepOutput::Empty, 0.9);
1221            trace.add_step(step);
1222        }
1223
1224        // 1 failed
1225        let mut failed = StepTrace::new("failed", 2);
1226        failed.fail("Error");
1227        trace.add_step(failed);
1228
1229        // 1 pending
1230        let pending = StepTrace::new("pending", 3);
1231        trace.add_step(pending);
1232
1233        // 1 running
1234        let mut running = StepTrace::new("running", 4);
1235        running.start();
1236        trace.add_step(running);
1237
1238        assert_eq!(trace.steps.len(), 5);
1239        assert_eq!(trace.completed_steps(), 2);
1240    }
1241}
reasonkit/thinktool/trace.rs

reasonkit/thinktool/
trace.rs