paladin-ai 0.5.0

Enterprise AI orchestration framework with multi-agent coordination patterns
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
//! PlanningService - LLM-based autonomous task decomposition
//!
//! This service implements US-14.1: Autonomous Planning Mode.
//! When a Paladin is configured with `MaxLoops::Auto`, it uses this service
//! to decompose complex tasks into subtasks, execute them with dependency tracking,
//! and synthesize results into a cohesive response.
//!
//! # Examples
//!
//! ```rust,no_run
//! use paladin::application::services::paladin::planning_service::PlanningService;
//! use paladin_ports::output::llm_port::LlmPort;
//! use std::sync::Arc;
//!
//! # async fn example(llm_port: Arc<dyn LlmPort>) -> Result<(), Box<dyn std::error::Error>> {
//! let planning_service = PlanningService::new(llm_port);
//!
//! // Create and execute a plan
//! let plan = planning_service.create_plan(
//!     "Analyze the security vulnerabilities in this codebase",
//!     10, // max_subtasks
//!     "gpt-4o", // model
//! ).await?;
//!
//! let result = planning_service.execute_subtasks(&plan, "/* code here */", "gpt-4o").await?;
//! # Ok(())
//! # }
//! ```

use crate::application::errors::planning_error::PlanningError;
use crate::core::platform::container::planning::{Subtask, TaskPlan};
use crate::core::platform::container::prompt::{PromptItem, PromptType, UserPrompt};
use log::info;
use paladin_ports::output::llm_port::{LlmPort, LlmRequest};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;

/// Service for LLM-based autonomous task planning and execution
///
/// Implements the planning mode where a Paladin decomposes complex tasks
/// into subtasks, manages their execution with dependency tracking, and
/// synthesizes results.
pub struct PlanningService {
    /// LLM port for task decomposition and synthesis
    llm_port: Arc<dyn LlmPort>,
}

/// Internal structure for deserializing LLM plan responses
#[derive(Debug, Clone, Serialize, Deserialize)]
struct LlmPlanResponse {
    task: String,
    subtasks: Vec<LlmSubtask>,
}

/// Internal structure for deserializing subtasks from LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
struct LlmSubtask {
    id: String,
    description: String,
    dependencies: Vec<String>,
}

impl PlanningService {
    /// Creates a new PlanningService
    ///
    /// # Arguments
    ///
    /// * `llm_port` - LLM port for generating plans and synthesizing results
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// use paladin::application::services::paladin::planning_service::PlanningService;
    /// use paladin_ports::output::llm_port::LlmPort;
    /// use std::sync::Arc;
    ///
    /// # fn example(llm_port: Arc<dyn LlmPort>) {
    /// let service = PlanningService::new(llm_port);
    /// # }
    /// ```
    pub fn new(llm_port: Arc<dyn LlmPort>) -> Self {
        info!("Creating PlanningService");
        Self { llm_port }
    }

    /// Creates a task decomposition plan using LLM
    ///
    /// # Arguments
    ///
    /// * `task_description` - Description of the task to decompose
    /// * `max_subtasks` - Maximum number of subtasks allowed
    /// * `model` - LLM model to use for planning (e.g., "gpt-4", "claude-3")
    ///
    /// # Returns
    ///
    /// A `TaskPlan` containing the decomposed subtasks
    ///
    /// # Errors
    ///
    /// Returns `PlanningError` if:
    /// - LLM call fails
    /// - Response cannot be parsed
    /// - Plan exceeds max_subtasks limit
    /// - Plan has invalid dependencies
    pub async fn create_plan(
        &self,
        task_description: &str,
        max_subtasks: u32,
        model: &str,
    ) -> Result<TaskPlan, PlanningError> {
        info!(
            "Creating plan for task: '{}' (max {} subtasks)",
            task_description, max_subtasks
        );

        // Build the planning prompt
        let prompt = self.build_planning_prompt(task_description, max_subtasks);

        // Call LLM
        let user_prompt = UserPrompt {
            query: prompt,
            context: None,
        };
        let prompt_item = PromptItem::new(PromptType::User(user_prompt))
            .map_err(|e| PlanningError::GenerationFailed(e.to_string()))?;

        let request = LlmRequest {
            id: Uuid::new_v4(),
            model: model.to_string(),
            prompt: prompt_item,
            attachments: vec![],
            stream: false,
            metadata: HashMap::new(),
        };

        let response = self
            .llm_port
            .generate(request)
            .await
            .map_err(|e| PlanningError::LlmError(e.to_string()))?;

        // Parse the LLM response into a TaskPlan
        let plan = self.parse_plan_from_llm(&response.content, max_subtasks)?;

        info!("Created plan with {} subtasks", plan.subtask_count());
        Ok(plan)
    }

    /// Executes all subtasks in dependency order
    ///
    /// # Arguments
    ///
    /// * `plan` - The task plan with subtasks to execute
    /// * `original_input` - The original task input/context
    /// * `model` - LLM model to use for executing subtasks (e.g., "gpt-4", "claude-3")
    ///
    /// # Returns
    ///
    /// A `TaskPlan` with all subtasks executed and results populated
    ///
    /// # Errors
    ///
    /// Returns `PlanningError` if:
    /// - LLM call fails for any subtask
    /// - Subtask execution fails
    /// - Circular dependencies detected
    pub async fn execute_subtasks(
        &self,
        plan: &TaskPlan,
        original_input: &str,
        model: &str,
    ) -> Result<TaskPlan, PlanningError> {
        info!(
            "Executing {} subtasks for task: '{}'",
            plan.subtasks.len(),
            plan.original_task
        );

        let mut executed_plan = plan.clone();
        let mut completed_ids: Vec<String> = Vec::new();

        // Execute subtasks in dependency order
        while completed_ids.len() < executed_plan.subtasks.len() {
            let mut made_progress = false;

            // Find next subtask to execute (need index to avoid borrow issues)
            let mut next_subtask_idx = None;
            let mut next_dependencies = Vec::new();

            for (idx, subtask) in executed_plan.subtasks.iter().enumerate() {
                // Skip if already completed
                if subtask.completed {
                    continue;
                }

                // Check if all dependencies are completed
                let dependencies = executed_plan
                    .dependencies
                    .get(&subtask.id)
                    .cloned()
                    .unwrap_or_default();

                let can_execute = dependencies
                    .iter()
                    .all(|dep_id| completed_ids.contains(dep_id));

                if can_execute {
                    next_subtask_idx = Some(idx);
                    next_dependencies = dependencies;
                    break;
                }
            }

            // Execute the found subtask
            if let Some(idx) = next_subtask_idx {
                let subtask_id = executed_plan.subtasks[idx].id.clone();
                info!(
                    "Executing subtask: {} - {}",
                    subtask_id, executed_plan.subtasks[idx].description
                );

                // Build context from completed dependencies
                let context =
                    self.build_subtask_context(&executed_plan, &next_dependencies, original_input);

                // Execute the subtask via LLM (need immutable reference)
                let result = self
                    .execute_subtask(&executed_plan.subtasks[idx], &context, model)
                    .await?;

                // Mark as completed (now we can mutate)
                executed_plan.subtasks[idx].complete(result);
                completed_ids.push(subtask_id.clone());
                made_progress = true;

                info!("Completed subtask: {}", subtask_id);
            }

            // Check for circular dependencies or impossible state
            if !made_progress && completed_ids.len() < executed_plan.subtasks.len() {
                return Err(PlanningError::InvalidPlan(
                    "Circular dependencies or invalid dependency graph detected".to_string(),
                ));
            }
        }

        info!("All {} subtasks completed", completed_ids.len());
        Ok(executed_plan)
    }

    /// Synthesizes subtask results into a cohesive final response
    ///
    /// # Arguments
    ///
    /// * `plan` - The completed task plan with subtask results
    /// * `original_task` - The original task description
    /// * `model` - LLM model to use for synthesis (e.g., "gpt-4", "claude-3")
    ///
    /// # Returns
    ///
    /// A cohesive response synthesizing all subtask results
    ///
    /// # Errors
    ///
    /// Returns `PlanningError` if:
    /// - LLM call fails
    /// - Plan has incomplete subtasks
    pub async fn synthesize_results(
        &self,
        plan: &TaskPlan,
        original_task: &str,
        model: &str,
    ) -> Result<String, PlanningError> {
        info!("Synthesizing results for task: '{}'", original_task);

        // Verify all subtasks are complete
        let incomplete: Vec<&Subtask> = plan.subtasks.iter().filter(|st| !st.completed).collect();
        if !incomplete.is_empty() {
            return Err(PlanningError::InvalidPlan(format!(
                "Cannot synthesize results: {} subtasks incomplete",
                incomplete.len()
            )));
        }

        // Build synthesis prompt
        let prompt = self.build_synthesis_prompt(plan, original_task);

        // Call LLM for synthesis
        let user_prompt = UserPrompt {
            query: prompt,
            context: None,
        };

        let mut prompt_item = PromptItem::new(PromptType::User(user_prompt))
            .map_err(|e| PlanningError::GenerationFailed(e.to_string()))?;

        // Use higher temperature for more natural synthesis
        use crate::core::platform::container::prompt::PromptParameters;
        prompt_item.set_parameters(PromptParameters {
            max_tokens: None,
            temperature: Some(0.7), // Higher temperature for natural language
            top_p: None,
            frequency_penalty: None,
            presence_penalty: None,
            stop_sequences: None,
        });

        let request = LlmRequest {
            id: Uuid::new_v4(),
            model: model.to_string(),
            prompt: prompt_item,
            attachments: vec![],
            stream: false,
            metadata: HashMap::new(),
        };

        let response = self
            .llm_port
            .generate(request)
            .await
            .map_err(|e| PlanningError::LlmError(e.to_string()))?;

        info!("Synthesis complete");
        Ok(response.content)
    }

    /// Builds the synthesis prompt for the LLM
    fn build_synthesis_prompt(&self, plan: &TaskPlan, original_task: &str) -> String {
        let mut subtask_results = String::new();
        for (i, subtask) in plan.subtasks.iter().enumerate() {
            if let Some(result) = &subtask.result {
                subtask_results.push_str(&format!(
                    "{}. {}\n   Result: {}\n\n",
                    i + 1,
                    subtask.description,
                    result
                ));
            }
        }

        format!(
            r#"You are synthesizing the results of multiple subtasks into a cohesive response.

ORIGINAL TASK: {}

COMPLETED SUBTASKS AND RESULTS:
{}

Synthesize these results into a clear, comprehensive response that directly addresses the original task. Provide a cohesive summary that:
1. Integrates information from all subtasks
2. Presents results in a logical flow
3. Highlights key findings or accomplishments
4. Provides clear next steps or conclusions if applicable

Write the synthesized response now:"#,
            original_task, subtask_results
        )
    }

    /// Builds the planning prompt for the LLM
    fn build_planning_prompt(&self, task_description: &str, max_subtasks: u32) -> String {
        format!(
            r#"You are a task planning assistant. Decompose the following task into subtasks.

TASK: {}

INSTRUCTIONS:
- Break down the task into {} or fewer subtasks
- Each subtask should be concrete and actionable
- Identify dependencies between subtasks
- Return your response as JSON in the following format:

{{
  "task": "original task description",
  "subtasks": [
    {{
      "id": "1",
      "description": "description of subtask",
      "dependencies": ["id1", "id2"]
    }}
  ]
}}

Return ONLY the JSON, no additional text."#,
            task_description, max_subtasks
        )
    }

    /// Parses LLM response into a TaskPlan
    ///
    /// # Arguments
    ///
    /// * `llm_response` - The LLM's response content
    /// * `max_subtasks` - Maximum allowed subtasks
    ///
    /// # Returns
    ///
    /// A validated `TaskPlan`
    ///
    /// # Errors
    ///
    /// Returns `PlanningError` if parsing fails or plan is invalid
    fn parse_plan_from_llm(
        &self,
        llm_response: &str,
        max_subtasks: u32,
    ) -> Result<TaskPlan, PlanningError> {
        // Try to extract JSON from the response (LLM might add extra text)
        let json_str = self.extract_json(llm_response)?;

        // Parse JSON
        let llm_plan: LlmPlanResponse = serde_json::from_str(&json_str)
            .map_err(|e| PlanningError::GenerationFailed(format!("JSON parse error: {}", e)))?;

        // Validate subtask count
        if llm_plan.subtasks.len() as u32 > max_subtasks {
            return Err(PlanningError::MaxSubtasksExceeded {
                max: max_subtasks,
                attempted: llm_plan.subtasks.len() as u32,
            });
        }

        // Create TaskPlan
        let mut plan = TaskPlan::new(llm_plan.task, max_subtasks);

        // Add subtasks
        for llm_subtask in llm_plan.subtasks {
            let subtask = Subtask::new(
                llm_subtask.id.clone(),
                llm_subtask.description,
                "Expected output from subtask execution".to_string(), // TODO: Ask LLM for expected output
            );
            plan.add_subtask(subtask)
                .map_err(PlanningError::InvalidPlan)?;

            // Add dependencies if any
            if !llm_subtask.dependencies.is_empty() {
                plan.dependencies
                    .insert(llm_subtask.id, llm_subtask.dependencies);
            }
        }

        // Validate the plan (checks for circular dependencies, etc.)
        plan.validate().map_err(PlanningError::InvalidPlan)?;

        Ok(plan)
    }

    /// Extracts JSON from LLM response (handles markdown code blocks, etc.)
    fn extract_json(&self, response: &str) -> Result<String, PlanningError> {
        let trimmed = response.trim();

        // Check for markdown code block
        if let Some(start) = trimmed.find("```json")
            && let Some(end) = trimmed[start + 7..].find("```")
        {
            return Ok(trimmed[start + 7..start + 7 + end].trim().to_string());
        }

        // Check for plain code block
        if let Some(start) = trimmed.find("```")
            && let Some(end) = trimmed[start + 3..].find("```")
        {
            return Ok(trimmed[start + 3..start + 3 + end].trim().to_string());
        }

        // Assume the whole response is JSON
        Ok(trimmed.to_string())
    }

    /// Builds context for subtask execution from completed dependencies
    fn build_subtask_context(
        &self,
        plan: &TaskPlan,
        dependencies: &[String],
        original_input: &str,
    ) -> String {
        if dependencies.is_empty() {
            return original_input.to_string();
        }

        let mut context = format!("Original Task: {}\n\n", original_input);
        context.push_str("Results from prerequisite subtasks:\n\n");

        for dep_id in dependencies {
            if let Some(dep_subtask) = plan.subtasks.iter().find(|st| st.id == *dep_id) {
                context.push_str(&format!(
                    "Subtask {}: {}\nResult: {}\n\n",
                    dep_subtask.id,
                    dep_subtask.description,
                    dep_subtask
                        .result
                        .as_ref()
                        .unwrap_or(&"No result".to_string())
                ));
            }
        }

        context
    }

    /// Executes a single subtask via LLM
    ///
    /// # Arguments
    ///
    /// * `subtask` - The subtask to execute
    /// * `context` - Contextual information including dependencies
    /// * `model` - LLM model to use (e.g., "gpt-4", "claude-3")
    async fn execute_subtask(
        &self,
        subtask: &Subtask,
        context: &str,
        model: &str,
    ) -> Result<String, PlanningError> {
        let prompt = format!(
            r#"You are executing a subtask as part of a larger plan.

SUBTASK: {}

EXPECTED OUTPUT: {}

CONTEXT:
{}

Execute this subtask and provide the result. Be concise and focused on the expected output."#,
            subtask.description, subtask.expected_output, context
        );

        let user_prompt = UserPrompt {
            query: prompt,
            context: None,
        };

        let mut prompt_item = PromptItem::new(PromptType::User(user_prompt))
            .map_err(|e| PlanningError::GenerationFailed(e.to_string()))?;

        // Set temperature for focused task execution
        use crate::core::platform::container::prompt::PromptParameters;
        prompt_item.set_parameters(PromptParameters {
            max_tokens: None,
            temperature: Some(0.3), // Lower temperature for task execution
            top_p: None,
            frequency_penalty: None,
            presence_penalty: None,
            stop_sequences: None,
        });

        let request = LlmRequest {
            id: Uuid::new_v4(),
            model: model.to_string(),
            prompt: prompt_item,
            attachments: vec![],
            stream: false,
            metadata: HashMap::new(),
        };

        let response = self
            .llm_port
            .generate(request)
            .await
            .map_err(|e| PlanningError::LlmError(e.to_string()))?;

        Ok(response.content)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use async_trait::async_trait;
    use chrono::Utc;
    use paladin_ports::output::llm_port::{
        FinishReason, LlmError, LlmResponse, ProviderCapabilities, TokenUsage,
    };

    /// Mock LLM port for testing
    struct MockLlmPort {
        response: String,
    }

    impl MockLlmPort {
        fn new(response: impl Into<String>) -> Self {
            Self {
                response: response.into(),
            }
        }
    }

    #[async_trait]
    impl LlmPort for MockLlmPort {
        async fn generate(&self, _request: LlmRequest) -> Result<LlmResponse, LlmError> {
            Ok(LlmResponse {
                id: Uuid::new_v4(),
                request_id: Uuid::new_v4(),
                model: "test-model".to_string(),
                content: self.response.clone(),
                finish_reason: FinishReason::Stop,
                usage: TokenUsage {
                    prompt_tokens: 10,
                    completion_tokens: 20,
                    total_tokens: 30,
                },
                created_at: Utc::now(),
                metadata: HashMap::new(),
                function_call: None,
            })
        }

        async fn generate_stream(
            &self,
            _request: LlmRequest,
        ) -> Result<
            Box<
                dyn futures::Stream<
                        Item = Result<paladin_ports::output::llm_port::StreamingResponse, LlmError>,
                    > + Send,
            >,
            LlmError,
        > {
            unimplemented!("Streaming not needed for tests")
        }

        async fn validate_model(&self, _model: &str) -> Result<bool, LlmError> {
            Ok(true)
        }

        async fn get_available_models(&self) -> Result<Vec<String>, LlmError> {
            Ok(vec!["test-model".to_string()])
        }

        fn get_provider_name(&self) -> &'static str {
            "mock"
        }

        fn get_capabilities(&self) -> ProviderCapabilities {
            ProviderCapabilities {
                supports_streaming: false,
                supports_function_calling: false,
                supports_tool_calling: false,
                supports_vision: false,
                supports_embeddings: false,
                supports_system_messages: true,
                max_context_tokens: Some(4096),
            }
        }
    }

    #[test]
    fn test_planning_service_new() {
        // Given: A mock LLM port
        let llm_port = Arc::new(MockLlmPort::new("test"));

        // When: Creating a new PlanningService
        let _service = PlanningService::new(llm_port.clone());

        // Then: The service should be created successfully
        // Verify the Arc has been cloned (service holds a reference)
        assert!(Arc::strong_count(&llm_port) >= 2);
    }

    #[tokio::test]
    async fn test_create_plan_basic() {
        // Given: A mock LLM that returns a valid plan
        let plan_json = r#"{
            "task": "Analyze security vulnerabilities",
            "subtasks": [
                {
                    "id": "1",
                    "description": "Scan for SQL injection vulnerabilities",
                    "dependencies": []
                },
                {
                    "id": "2",
                    "description": "Check for XSS vulnerabilities",
                    "dependencies": []
                },
                {
                    "id": "3",
                    "description": "Generate security report",
                    "dependencies": ["1", "2"]
                }
            ]
        }"#;

        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port);

        // When: Creating a plan
        let result = service
            .create_plan("Analyze security vulnerabilities", 10, "gpt-4")
            .await;

        // Then: The plan should be created successfully
        assert!(result.is_ok());
        let plan = result.unwrap();
        assert_eq!(plan.subtask_count(), 3);
    }

    #[tokio::test]
    async fn test_create_plan_enforces_max_subtasks() {
        // Given: A mock LLM that returns a plan with many subtasks
        let plan_json = r#"{
            "task": "Complex task",
            "subtasks": [
                {"id": "1", "description": "Task 1", "dependencies": []},
                {"id": "2", "description": "Task 2", "dependencies": []},
                {"id": "3", "description": "Task 3", "dependencies": []},
                {"id": "4", "description": "Task 4", "dependencies": []},
                {"id": "5", "description": "Task 5", "dependencies": []},
                {"id": "6", "description": "Task 6", "dependencies": []}
            ]
        }"#;

        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port);

        // When: Creating a plan with max_subtasks=3
        let result = service.create_plan("Complex task", 3, "gpt-4").await;

        // Then: Should return error for exceeding limit
        assert!(result.is_err());
        if let Err(e) = result {
            // Check it's the right error type
            match e {
                PlanningError::MaxSubtasksExceeded { max, attempted } => {
                    assert_eq!(max, 3);
                    assert_eq!(attempted, 6);
                }
                other => panic!("Expected MaxSubtasksExceeded, got: {:?}", other),
            }
        }
    }

    #[tokio::test]
    async fn test_execute_subtasks_with_dependencies() {
        // Given: A plan with subtasks that have dependencies
        let plan_json = r#"{
            "task": "Build and test application",
            "subtasks": [
                {
                    "id": "1",
                    "description": "Install dependencies",
                    "dependencies": []
                },
                {
                    "id": "2",
                    "description": "Build application",
                    "dependencies": ["1"]
                },
                {
                    "id": "3",
                    "description": "Run tests",
                    "dependencies": ["2"]
                }
            ]
        }"#;

        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port.clone());

        // When: Creating and executing the plan
        let plan = service
            .create_plan("Build and test application", 10, "gpt-4")
            .await
            .expect("Failed to create plan");

        let result = service
            .execute_subtasks(&plan, "Build and test application", "gpt-4")
            .await;

        // Then: Subtasks should execute in dependency order
        assert!(result.is_ok());
        let executed_plan = result.unwrap();

        // All subtasks should be marked as completed
        assert_eq!(executed_plan.subtasks.len(), 3);

        // Verify subtasks have results
        for subtask in &executed_plan.subtasks {
            assert!(
                subtask.completed,
                "Subtask {} should be completed",
                subtask.id
            );
            assert!(
                subtask.result.is_some(),
                "Subtask {} should have a result",
                subtask.id
            );
        }
    }

    #[tokio::test]
    async fn test_synthesize_results() {
        // Given: A completed plan with subtask results
        let mut plan = TaskPlan::new("Build and deploy application".to_string(), 10);

        let mut subtask1 = Subtask::new(
            "1".to_string(),
            "Install dependencies".to_string(),
            "Dependencies installed".to_string(),
        );
        subtask1.complete(
            "Successfully installed all dependencies: express, react, typescript".to_string(),
        );

        let mut subtask2 = Subtask::new(
            "2".to_string(),
            "Build application".to_string(),
            "Build output".to_string(),
        );
        subtask2
            .complete("Build completed successfully. Output: dist/bundle.js (245 KB)".to_string());

        let mut subtask3 = Subtask::new(
            "3".to_string(),
            "Run tests".to_string(),
            "Test results".to_string(),
        );
        subtask3.complete("All tests passed: 42 passed, 0 failed".to_string());

        plan.add_subtask(subtask1).unwrap();
        plan.add_subtask(subtask2).unwrap();
        plan.add_subtask(subtask3).unwrap();

        // Mock LLM to return synthesized result
        let synthesis_response = r#"Successfully built and tested the application:
1. Installed all required dependencies (express, react, typescript)
2. Built the application successfully (output: dist/bundle.js, 245 KB)
3. Verified functionality with complete test suite (42 tests passed)

The application is ready for deployment."#;

        let llm_port = Arc::new(MockLlmPort::new(synthesis_response));
        let service = PlanningService::new(llm_port);

        // When: Synthesizing results
        let result = service
            .synthesize_results(&plan, "Build and deploy application", "gpt-4")
            .await;

        // Then: Should return a cohesive synthesized response
        assert!(result.is_ok());
        let synthesized = result.unwrap();

        // Verify the synthesis contains information from all subtasks
        assert!(synthesized.contains("dependencies"));
        assert!(synthesized.contains("Built"));
        assert!(synthesized.contains("tests passed"));
        assert!(synthesized.contains("ready for deployment"));
    }

    #[tokio::test]
    async fn test_planning_failure_invalid_json() {
        // Given: A mock LLM that returns invalid JSON
        let invalid_json = "This is not valid JSON at all!";
        let llm_port = Arc::new(MockLlmPort::new(invalid_json));
        let service = PlanningService::new(llm_port);

        // When: Creating a plan
        let result = service.create_plan("Some task", 10, "gpt-4").await;

        // Then: Should return generation failed error
        assert!(result.is_err());
        if let Err(e) = result {
            match e {
                PlanningError::GenerationFailed(_) => {
                    // Expected error type
                }
                other => panic!("Expected GenerationFailed, got: {:?}", other),
            }
        }
    }

    #[tokio::test]
    async fn test_synthesis_with_incomplete_subtasks() {
        // Given: A plan with incomplete subtasks
        let mut plan = TaskPlan::new("Test task".to_string(), 10);
        let subtask1 = Subtask::new(
            "1".to_string(),
            "Incomplete task".to_string(),
            "Output".to_string(),
        );
        // Don't complete it
        plan.add_subtask(subtask1).unwrap();

        let llm_port = Arc::new(MockLlmPort::new("Some response"));
        let service = PlanningService::new(llm_port);

        // When: Trying to synthesize results
        let result = service
            .synthesize_results(&plan, "Test task", "gpt-4")
            .await;

        // Then: Should return error about incomplete subtasks
        assert!(result.is_err());
        if let Err(e) = result {
            match e {
                PlanningError::InvalidPlan(msg) => {
                    assert!(msg.contains("incomplete"));
                }
                other => panic!("Expected InvalidPlan, got: {:?}", other),
            }
        }
    }

    #[tokio::test]
    async fn test_planning_logs_progress() {
        // Given: A valid plan
        let plan_json = r#"{
            "task": "Simple task",
            "subtasks": [
                {"id": "1", "description": "Do something", "dependencies": []}
            ]
        }"#;

        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port);

        // When: Creating and executing a plan
        // (This test verifies logging is present - actual log output would be visible with RUST_LOG=info)
        let plan = service.create_plan("Simple task", 10, "gpt-4").await;
        assert!(plan.is_ok());

        let plan = plan.unwrap();
        let result = service
            .execute_subtasks(&plan, "Simple task", "gpt-4")
            .await;
        assert!(result.is_ok());

        // Then: Test passes if logging doesn't panic
        // Logging is tested by checking that info! calls exist in the code
        // In a real scenario, we'd use a test logging framework to capture logs
    }

    #[tokio::test]
    async fn test_planning_service_uses_configured_model() {
        // Given: A mock LLM port that tracks which model was used
        let plan_json = r#"{
            "task": "Test task",
            "subtasks": [
                {"id": "1", "description": "Test subtask", "dependencies": []}
            ]
        }"#;
        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port.clone());

        // When: Creating a plan with a specific model
        let result = service.create_plan("Test task", 5, "claude-3").await;

        // Then: The plan should be created and the model should have been used
        assert!(result.is_ok());
        let plan = result.unwrap();
        assert_eq!(plan.subtask_count(), 1);

        // Note: In a real implementation with model tracking, we'd verify
        // llm_port.last_model_used() == "claude-3"
    }

    #[tokio::test]
    async fn test_planning_service_validates_model_compatibility() {
        // Given: A mock LLM port that returns a valid plan
        let plan_json = r#"{
            "task": "Test task",
            "subtasks": [
                {"id": "1", "description": "Test subtask", "dependencies": []}
            ]
        }"#;
        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port);

        // When: Using different model identifiers
        let gpt4_result = service.create_plan("Task 1", 5, "gpt-4").await;
        let claude_result = service.create_plan("Task 2", 5, "claude-3").await;
        let custom_result = service.create_plan("Task 3", 5, "custom-model").await;

        // Then: All should work (model validation happens in LlmPort layer)
        assert!(gpt4_result.is_ok());
        assert!(claude_result.is_ok());
        assert!(custom_result.is_ok());
    }

    #[tokio::test]
    async fn test_planning_service_falls_back_on_invalid_model() {
        // Given: A mock LLM port that would fail with invalid model
        // (In reality, the LlmPort implementation handles fallback)
        let plan_json = r#"{
            "task": "Test task",
            "subtasks": [
                {"id": "1", "description": "Test subtask", "dependencies": []}
            ]
        }"#;
        let llm_port = Arc::new(MockLlmPort::new(plan_json));
        let service = PlanningService::new(llm_port);

        // When: Using an empty or invalid model string
        // The service itself doesn't validate - it passes to LlmPort
        let result = service.create_plan("Test task", 5, "").await;

        // Then: The service doesn't fail at the planning level
        // (LlmPort would handle the invalid model error)
        // For this mock, it still succeeds
        assert!(result.is_ok());
    }
}