nika 0.35.4 - Docs.rs

//! Analyzed task AST.
//!
//! Tasks with resolved references - TaskId instead of String.
//!

use indexmap::IndexMap;

use super::ids::TaskId;
use crate::ast::artifact::ArtifactSpec;
use crate::ast::decompose::DecomposeSpec;
use crate::ast::logging::LogConfig;
use crate::ast::structured::StructuredOutputSpec;
use crate::binding::WithSpec;
use crate::source::Span;

/// An analyzed task - validated and resolved.
///
/// All string references are replaced with interned IDs.
///
#[derive(Debug, Clone)]
pub struct AnalyzedTask {
    /// Task ID (interned)
    pub id: TaskId,

    /// Task name (for display/debugging)
    pub name: String,

    /// Optional description
    pub description: Option<String>,

    /// The action this task performs
    pub action: AnalyzedTaskAction,

    /// Task-specific provider override
    pub provider: Option<String>,

    /// Task-specific model override
    pub model: Option<String>,

    /// Parsed `with:` bindings (alias → WithEntry with source, transforms, defaults)
    ///
    /// Phase 2 parses raw `Spanned<String>` values via `parse_with_entry()`.
    /// Each entry has a `BindingPath` source, optional transforms, defaults, and type.
    pub with_spec: WithSpec,

    /// Explicit ordering dependencies: `depends_on: [task_id1, task_id2]`
    ///
    /// These are pure ordering edges — no data flows through them.
    /// Resolved from raw string task names to interned `TaskId`.
    pub depends_on: Vec<TaskId>,

    /// Implicit dependencies auto-extracted from `with:` bindings.
    ///
    /// When a `WithEntry` source references a task (e.g., `step1.data`),
    /// the analyzer extracts `step1` as an implicit dependency.
    /// These are used by the DAG builder alongside `depends_on`.
    pub implicit_deps: Vec<TaskId>,

    /// Output configuration
    pub output: Option<AnalyzedOutput>,

    /// For-each iteration configuration
    pub for_each: Option<AnalyzedForEach>,

    /// Retry configuration
    pub retry: Option<AnalyzedRetry>,

    /// Decompose modifier for runtime DAG expansion
    pub decompose: Option<DecomposeSpec>,

    /// Standalone concurrency (used with decompose when no for_each)
    pub concurrency: Option<u32>,

    /// Standalone fail_fast (used with decompose when no for_each)
    pub fail_fast: Option<bool>,

    /// Artifact configuration for file persistence
    pub artifact: Option<ArtifactSpec>,

    /// Per-task log configuration
    pub log: Option<LogConfig>,

    /// Structured output specification (JSON schema enforcement)
    pub structured: Option<StructuredOutputSpec>,

    /// Span of the task
    pub span: Span,
}

/// The action a task performs (analyzed).
#[derive(Debug, Clone)]
pub enum AnalyzedTaskAction {
    /// LLM inference
    Infer(AnalyzedInferAction),

    /// Shell command execution
    Exec(AnalyzedExecAction),

    /// HTTP fetch
    Fetch(AnalyzedFetchAction),

    /// MCP tool invocation
    Invoke(AnalyzedInvokeAction),

    /// Autonomous agent
    Agent(AnalyzedAgentAction),
}

impl Default for AnalyzedTaskAction {
    fn default() -> Self {
        AnalyzedTaskAction::Infer(AnalyzedInferAction::default())
    }
}

impl AnalyzedTaskAction {
    /// Get the verb name.
    pub fn verb_name(&self) -> &'static str {
        match self {
            AnalyzedTaskAction::Infer(_) => "infer",
            AnalyzedTaskAction::Exec(_) => "exec",
            AnalyzedTaskAction::Fetch(_) => "fetch",
            AnalyzedTaskAction::Invoke(_) => "invoke",
            AnalyzedTaskAction::Agent(_) => "agent",
        }
    }
}

/// Analyzed infer action.
#[derive(Debug, Clone, Default)]
pub struct AnalyzedInferAction {
    /// The prompt to send to the LLM (may be empty when content is present)
    pub prompt: String,

    /// System prompt override
    pub system: Option<String>,

    /// Temperature (validated: 0.0 - 2.0)
    pub temperature: Option<f64>,

    /// Maximum tokens to generate
    pub max_tokens: Option<u32>,

    /// Enable extended thinking
    pub thinking: Option<bool>,

    /// Thinking budget tokens
    pub thinking_budget: Option<u32>,

    /// Multimodal content parts for vision (analyzed, spans stripped)
    pub content: Option<Vec<crate::ast::content::AnalyzedContentPart>>,

    /// Expected response format: text, json, markdown
    pub response_format: Option<String>,

    /// Guardrails for validating infer output
    pub guardrails: Vec<crate::ast::guardrails::GuardrailConfig>,

    /// Span of the action
    pub span: Span,
}

/// Analyzed exec action.
#[derive(Debug, Clone, Default)]
pub struct AnalyzedExecAction {
    /// Command to execute
    pub command: String,

    /// Run through shell
    pub shell: bool,

    /// Working directory
    pub working_dir: Option<String>,

    /// Environment variables
    pub env: IndexMap<String, String>,

    /// Timeout in milliseconds
    pub timeout_ms: Option<u64>,

    /// Span of the action
    pub span: Span,
}

/// Analyzed fetch action.
#[derive(Debug, Clone, Default)]
pub struct AnalyzedFetchAction {
    /// URL to fetch
    pub url: String,

    /// HTTP method
    pub method: HttpMethod,

    /// HTTP headers
    pub headers: IndexMap<String, String>,

    /// Request body
    pub body: Option<String>,

    /// Request body as JSON
    pub json: Option<serde_json::Value>,

    /// Timeout in milliseconds
    pub timeout_ms: Option<u64>,

    /// Follow redirects
    pub follow_redirects: bool,

    /// Response mode: "full" or "binary"
    pub response: Option<String>,

    /// Extraction mode: markdown, article, text, selector, metadata, links, feed, jsonpath, llm_txt
    pub extract: Option<String>,

    /// CSS selector or JSONPath expression (used with extract)
    pub selector: Option<String>,

    /// Span of the action
    pub span: Span,
}

/// HTTP methods.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum HttpMethod {
    #[default]
    Get,
    Post,
    Put,
    Patch,
    Delete,
    Head,
    Options,
}

impl HttpMethod {
    /// Parse an HTTP method string.
    pub fn parse(s: &str) -> Option<Self> {
        match s.to_uppercase().as_str() {
            "GET" => Some(Self::Get),
            "POST" => Some(Self::Post),
            "PUT" => Some(Self::Put),
            "PATCH" => Some(Self::Patch),
            "DELETE" => Some(Self::Delete),
            "HEAD" => Some(Self::Head),
            "OPTIONS" => Some(Self::Options),
            _ => None,
        }
    }

    /// Get the method as a string.
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Get => "GET",
            Self::Post => "POST",
            Self::Put => "PUT",
            Self::Patch => "PATCH",
            Self::Delete => "DELETE",
            Self::Head => "HEAD",
            Self::Options => "OPTIONS",
        }
    }
}

/// Analyzed invoke action.
#[derive(Debug, Clone, Default)]
pub struct AnalyzedInvokeAction {
    /// MCP server name (None = first available)
    pub server: Option<String>,

    /// Tool name
    pub tool: String,

    /// Tool parameters
    pub params: Option<serde_json::Value>,

    /// Timeout for tool execution
    pub timeout_ms: Option<u64>,

    /// Span of the action
    pub span: Span,
}

/// Analyzed agent action.
#[derive(Debug, Clone, Default)]
pub struct AnalyzedAgentAction {
    /// The prompt for the agent
    pub prompt: String,

    /// Available tools
    pub tools: Vec<String>,

    /// Maximum iterations
    pub max_iterations: Option<u32>,

    /// Maximum tokens per response
    pub max_tokens: Option<u32>,

    /// Agent definition reference (resolved)
    pub from: Option<String>,

    /// Skills to inject
    pub skills: Vec<String>,

    /// MCP servers for tool access
    pub mcp: Vec<String>,

    /// System prompt (agent persona)
    pub system: Option<String>,

    /// Temperature for LLM sampling
    pub temperature: Option<f64>,

    /// Token budget for the agent
    pub token_budget: Option<u32>,

    /// Enable extended thinking (Claude)
    pub extended_thinking: Option<bool>,

    /// Thinking budget tokens
    pub thinking_budget: Option<u32>,

    /// Max spawn_agent recursion depth
    pub depth_limit: Option<u32>,

    /// Tool choice behavior: auto, required, none
    pub tool_choice: Option<String>,

    /// Sequences that stop generation (passed to LLM)
    pub stop_sequences: Vec<String>,

    /// Scope preset (full, minimal, debug)
    pub scope: Option<String>,

    /// Span of the action
    pub span: Span,
}

/// Analyzed output configuration.
#[derive(Debug, Clone)]
pub struct AnalyzedOutput {
    /// Output format
    pub format: OutputFormat,

    /// JSON Schema for validation (validated)
    pub schema: Option<serde_json::Value>,

    /// Schema reference: file path or named ref (from `schema_ref:` / `$ref:`)
    pub schema_ref: Option<String>,

    /// Maximum retries on validation failure
    pub max_retries: Option<u32>,

    /// Span of the output config
    pub span: Span,
}

/// Output format.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum OutputFormat {
    #[default]
    Text,
    Json,
    Yaml,
}

impl OutputFormat {
    /// Parse an output format string.
    pub fn parse(s: &str) -> Option<Self> {
        match s.to_lowercase().as_str() {
            "text" => Some(Self::Text),
            "json" => Some(Self::Json),
            "yaml" => Some(Self::Yaml),
            _ => None,
        }
    }

    /// Get the format as a string.
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Text => "text",
            Self::Json => "json",
            Self::Yaml => "yaml",
        }
    }
}

/// Analyzed for-each iteration configuration.
#[derive(Debug, Clone)]
pub struct AnalyzedForEach {
    /// Items expression (binding expression or serialized array)
    pub items: String,

    /// Loop variable name (default: "item")
    pub as_var: String,

    /// Maximum concurrency (None = unlimited)
    pub parallel: Option<u32>,

    /// Fail fast on first error (default: true)
    pub fail_fast: bool,

    /// Span of the for_each config
    pub span: Span,
}

impl Default for AnalyzedForEach {
    fn default() -> Self {
        Self {
            items: String::new(),
            as_var: "item".to_string(),
            parallel: Some(1), // Default to sequential
            fail_fast: true,
            span: Span::dummy(),
        }
    }
}

impl AnalyzedForEach {
    /// Check if this is a binding expression.
    pub fn is_binding(&self) -> bool {
        self.items.starts_with("{{") || self.items.starts_with("$")
    }

    /// Check if items is a literal array.
    pub fn is_array(&self) -> bool {
        self.items.starts_with('[')
    }

    /// Parse items as a JSON array if it's a literal.
    pub fn parse_items(&self) -> Option<Vec<serde_json::Value>> {
        if self.is_array() {
            serde_json::from_str(&self.items).ok()
        } else {
            None
        }
    }
}

/// Analyzed retry configuration.
#[derive(Debug, Clone)]
pub struct AnalyzedRetry {
    /// Maximum retry attempts (validated: 1-10)
    pub max_attempts: u32,

    /// Delay between retries in milliseconds (validated: 0-60000)
    pub delay_ms: u64,

    /// Exponential backoff multiplier (validated: 1.0-5.0)
    pub backoff: Option<f64>,

    /// Span of the retry config
    pub span: Span,
}

impl Default for AnalyzedRetry {
    fn default() -> Self {
        Self {
            max_attempts: 3,
            delay_ms: 1000,
            backoff: None,
            span: Span::dummy(),
        }
    }
}

impl AnalyzedRetry {
    /// Calculate delay for a given attempt (0-indexed).
    pub fn delay_for_attempt(&self, attempt: u32) -> u64 {
        if attempt == 0 {
            return 0; // No delay for first attempt
        }
        match self.backoff {
            Some(multiplier) => {
                let factor = multiplier.powi(attempt as i32 - 1);
                (self.delay_ms as f64 * factor) as u64
            }
            None => self.delay_ms,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::source::FileId;

    fn make_span(start: u32, end: u32) -> Span {
        Span::new(FileId(0), start, end)
    }

    #[test]
    fn test_http_method_parse() {
        assert_eq!(HttpMethod::parse("GET"), Some(HttpMethod::Get));
        assert_eq!(HttpMethod::parse("get"), Some(HttpMethod::Get));
        assert_eq!(HttpMethod::parse("POST"), Some(HttpMethod::Post));
        assert_eq!(HttpMethod::parse("UNKNOWN"), None);
    }

    #[test]
    fn test_output_format_parse() {
        assert_eq!(OutputFormat::parse("text"), Some(OutputFormat::Text));
        assert_eq!(OutputFormat::parse("JSON"), Some(OutputFormat::Json));
        assert_eq!(OutputFormat::parse("yaml"), Some(OutputFormat::Yaml));
        assert_eq!(OutputFormat::parse("unknown"), None);
    }

    #[test]
    fn test_analyzed_task_action_verb() {
        let infer = AnalyzedTaskAction::Infer(AnalyzedInferAction::default());
        assert_eq!(infer.verb_name(), "infer");

        let exec = AnalyzedTaskAction::Exec(AnalyzedExecAction::default());
        assert_eq!(exec.verb_name(), "exec");
    }

    #[test]
    fn test_analyzed_task_with_spec() {
        use crate::binding::types::{BindingPath, BindingSource, PathSegment};
        use crate::binding::{WithEntry, WithSpec};

        let mut with_spec = WithSpec::default();
        with_spec.insert(
            "data".to_string(),
            WithEntry::simple(BindingPath {
                source: BindingSource::Task("step1".into()),
                segments: vec![PathSegment::Field("result".into())],
            }),
        );

        assert_eq!(with_spec.len(), 1);
        let entry = with_spec.get("data").unwrap();
        assert_eq!(entry.task_id(), Some("step1"));
    }

    #[test]
    fn test_analyzed_for_each_default() {
        let for_each = AnalyzedForEach::default();
        assert_eq!(for_each.as_var, "item");
        assert_eq!(for_each.parallel, Some(1)); // Sequential by default
        assert!(for_each.fail_fast);
    }

    #[test]
    fn test_analyzed_for_each_is_binding() {
        let for_each = AnalyzedForEach {
            items: "{{with.items}}".to_string(),
            ..Default::default()
        };
        assert!(for_each.is_binding());

        let for_each = AnalyzedForEach {
            items: "$items".to_string(),
            ..Default::default()
        };
        assert!(for_each.is_binding());

        let for_each = AnalyzedForEach {
            items: r#"["a", "b", "c"]"#.to_string(),
            ..Default::default()
        };
        assert!(!for_each.is_binding());
    }

    #[test]
    fn test_analyzed_for_each_is_array() {
        let for_each = AnalyzedForEach {
            items: r#"["a", "b", "c"]"#.to_string(),
            ..Default::default()
        };
        assert!(for_each.is_array());

        let for_each = AnalyzedForEach {
            items: "{{with.items}}".to_string(),
            ..Default::default()
        };
        assert!(!for_each.is_array());
    }

    #[test]
    fn test_analyzed_for_each_parse_items() {
        let for_each = AnalyzedForEach {
            items: r#"["a", "b", "c"]"#.to_string(),
            ..Default::default()
        };
        let items = for_each.parse_items().unwrap();
        assert_eq!(items.len(), 3);
        assert_eq!(items[0], serde_json::Value::String("a".to_string()));

        let for_each = AnalyzedForEach {
            items: "{{with.items}}".to_string(),
            ..Default::default()
        };
        assert!(for_each.parse_items().is_none());
    }

    #[test]
    fn test_analyzed_retry_default() {
        let retry = AnalyzedRetry::default();
        assert_eq!(retry.max_attempts, 3);
        assert_eq!(retry.delay_ms, 1000);
        assert!(retry.backoff.is_none());
    }

    #[test]
    fn test_analyzed_retry_delay_for_attempt() {
        // Without backoff
        let retry = AnalyzedRetry {
            max_attempts: 3,
            delay_ms: 1000,
            backoff: None,
            span: make_span(0, 10),
        };
        assert_eq!(retry.delay_for_attempt(0), 0); // First attempt, no delay
        assert_eq!(retry.delay_for_attempt(1), 1000); // Retry 1
        assert_eq!(retry.delay_for_attempt(2), 1000); // Retry 2

        // With exponential backoff
        let retry = AnalyzedRetry {
            max_attempts: 5,
            delay_ms: 1000,
            backoff: Some(2.0),
            span: make_span(0, 10),
        };
        assert_eq!(retry.delay_for_attempt(0), 0); // No delay
        assert_eq!(retry.delay_for_attempt(1), 1000); // 1000 * 2^0
        assert_eq!(retry.delay_for_attempt(2), 2000); // 1000 * 2^1
        assert_eq!(retry.delay_for_attempt(3), 4000); // 1000 * 2^2
    }
}