ggen-cli-lib 26.7.2

#![allow(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::panic,
    clippy::needless_raw_string_hashes,
    clippy::duration_suboptimal_units,
    clippy::branches_sharing_code,
    clippy::used_underscore_binding,
    clippy::single_char_pattern,
    clippy::ignore_without_reason,
    clippy::cloned_ref_to_slice_refs,
    clippy::doc_overindented_list_items,
    clippy::match_wildcard_for_single_variants,
    clippy::ignored_unit_patterns,
    clippy::needless_collect,
    clippy::unnecessary_map_or,
    clippy::manual_flatten,
    clippy::manual_strip,
    clippy::future_not_send,
    clippy::unnested_or_patterns,
    clippy::no_effect_underscore_binding,
    clippy::literal_string_with_formatting_args
)]
//! End-to-End LLM Integration Test (Chicago TDD)
//!
//! This test verifies the LLM integration works by:
//! 1. Creating a test project with enable_llm: true
//! 2. Running ggen sync with real GROQ_API_KEY
//! 3. Verifying generated code has LLM implementations (not TODO stubs)
//! 4. Checking OpenTelemetry traces for real LLM calls
//!
//! CHICAGO TDD: This test uses REAL endpoints only. No mocks.
//! The test will fail if GROQ_API_KEY is not set.

use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;

/// Helper struct for test project setup
struct TestProject {
    #[allow(dead_code)]
    temp_dir: TempDir,
    project_dir: PathBuf,
}

impl TestProject {
    /// Create a new test project with LLM enabled
    fn new_with_llm() -> Self {
        let temp_dir = TempDir::new().expect("Failed to create temp dir");
        let project_dir = temp_dir.path().join("test-llm-project");

        // Create project directory
        fs::create_dir_all(&project_dir).expect("Failed to create project dir");

        // Create .ggen directory structure
        let ggen_dir = project_dir.join(".ggen");
        fs::create_dir_all(&ggen_dir).expect("Failed to create .ggen dir");

        // Create ggen.toml with enable_llm = true
        let ggen_toml = r#"
[project]
name = "test-llm-project"
version = "0.1.0"
description = "Test project for LLM integration"

[ontology]
source = ".ggen/test.ttl"

[inference]
rules = [
    { name = "standard-normalization", construct = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }" }
]

[generation]
enable_llm = true
llm_provider = "groq"
llm_model = "groq::openai/gpt-oss-20b"
[[generation.rules]]
name = "test-rule"
query = { inline = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX a2a: <http://ggen.ai/a2a#>

SELECT ?skill_name ?system_prompt ?implementation_hint ?language
WHERE {
  ?skill a a2a:Skill ;
         rdfs:label ?skill_name ;
         a2a:hasSystemPrompt ?system_prompt ;
         a2a:hasImplementationHint ?implementation_hint .
  BIND("rust" AS ?language)
}
""" }
template = { inline = "{{generated_impl}}" }
output_file = "src/skills/test_skill.rs"
mode = "Overwrite"

[ai]
model = "groq::openai/gpt-oss-20b"
temperature = 0.7
max_tokens = 4096
"#;
        fs::write(project_dir.join("ggen.toml"), ggen_toml).expect("Failed to write ggen.toml");

        // Create a simple ontology with behavior predicates
        let ontology = r#"
@prefix : <http://example.org/test#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix a2a: <http://ggen.ai/a2a#> .
@prefix mcp: <http://ggen.ai/mcp#> .

:test_skill a a2a:Skill ;
    rdfs:label "Test Skill" ;
    rdfs:comment "A test skill for LLM verification" ;
    a2a:hasSystemPrompt "You are a test assistant. Say hello." ;
    a2a:hasImplementationHint "Return a simple greeting" ;
    a2a:hasInputType "string" ;
    a2a:hasOutputType "string" .
"#;
        fs::write(ggen_dir.join("test.ttl"), ontology).expect("Failed to write ontology");

        Self {
            temp_dir,
            project_dir,
        }
    }

    /// Get the path to the ggen binary
    fn ggen_binary(&self) -> PathBuf {
        // Use cargo build to get the binary path
        PathBuf::from(env!("CARGO_BIN_EXE_ggen"))
    }

    /// Run ggen sync command
    fn run_sync(&self) -> std::process::Output {
        let output = Command::new(self.ggen_binary())
            .arg("sync")
            .arg("--ontology")
            .arg(self.project_dir.join(".ggen/test.ttl"))
            .current_dir(&self.project_dir)
            .env(
                "GROQ_API_KEY",
                std::env::var("GROQ_API_KEY").unwrap_or_default(),
            )
            .env("RUST_LOG", "debug,ggen_ai=trace,ggen_core=trace")
            .output()
            .expect("Failed to run ggen sync");

        output
    }

    /// Read generated skill implementation file
    fn read_generated_impl(&self) -> String {
        let impl_path = self.project_dir.join("src/skills/test_skill.rs");
        fs::read_to_string(&impl_path).unwrap_or_default()
    }
}

#[test]
#[ignore] // Only run with explicit permission (requires API key)
fn test_llm_integration_e2e_with_real_api() {
    // Verify GROQ_API_KEY is set
    let api_key = std::env::var("GROQ_API_KEY");
    if api_key.is_err() || api_key.unwrap().is_empty() {
        panic!(
            "GROQ_API_KEY must be set for this E2E test. \
                This test makes REAL API calls to Groq (Chicago TDD)."
        );
    }

    // Create test project with LLM enabled
    let project = TestProject::new_with_llm();

    // Run ggen sync (this will make REAL LLM API calls)
    let output = project.run_sync();

    // Check that sync succeeded
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        let stdout = String::from_utf8_lossy(&output.stdout);
        panic!(
            "ggen sync failed:\nstdout:\n{}\n\nstderr:\n{}",
            stdout, stderr
        );
    }

    // Verify OpenTelemetry traces contain LLM calls
    let stdout = String::from_utf8_lossy(&output.stdout);
    let stderr = String::from_utf8_lossy(&output.stderr);
    let combined = format!("{}{}", stdout, stderr);

    // Verify OpenTelemetry span markers for real LLM calls
    // Check for llm.complete span (the main span name)
    assert!(
        combined.contains("llm.complete"),
        "Expected to find 'llm.complete' span in OTEL traces.\n\
         This indicates the LLM API was not called.\n\
         Output:\n{}",
        combined
    );

    // Check for required OTEL attributes: llm.model
    assert!(
        combined.contains("llm.model"),
        "Expected to find 'llm.model' attribute in OTEL traces.\n\
         Output:\n{}",
        combined
    );

    // Check for required OTEL attributes: token counts
    assert!(
        combined.contains("llm.total_tokens") || combined.contains("total_tokens"),
        "Expected to find token counts in OTEL traces.\n\
         Output:\n{}",
        combined
    );

    // Check for model name (confirms real API call to Groq)
    assert!(
        combined.contains("gpt-oss-20b")
            || combined.contains("groq")
            || combined.contains("llm.model="),
        "Expected to find Groq model identifier in traces.\n\
         Output:\n{}",
        combined
    );

    // Verify generated code has actual implementation, not TODO stubs
    let generated_code = project.read_generated_impl();

    assert!(
        !generated_code.is_empty(),
        "Generated implementation file should not be empty"
    );

    // Check that we got actual LLM-generated code, not TODO stubs
    assert!(
        !generated_code.contains("TODO: Implement this skill")
            && !generated_code.contains("// TODO")
            && !generated_code.contains("unimplemented!()"),
        "Generated code should contain LLM implementation, not TODO stubs.\n\
         Generated code:\n{}",
        generated_code
    );

    // Verify the code has actual content (LLM should have generated something)
    assert!(
        generated_code.len() > 100, // At least some meaningful code
        "Generated code seems too short, likely not using LLM properly.\n\
         Generated code:\n{}",
        generated_code
    );

    println!("✅ E2E LLM integration test PASSED");
    println!("   ✓ Real Groq API calls were made (verified via OTEL traces)");
    println!("   ✓ OTEL spans: llm.complete, llm.model, llm.total_tokens");
    println!("   ✓ Generated code has actual implementation (not TODO stubs)");
    println!("   ✓ Code length: {} bytes", generated_code.len());
}

#[test]
#[ignore] // Only run with explicit permission
fn test_llm_integration_without_api_key_fails_gracefully() {
    // Create test project
    let project = TestProject::new_with_llm();

    // Run sync WITHOUT API key
    let output = Command::new(project.ggen_binary())
        .arg("sync")
        .arg("--ontology")
        .arg(project.project_dir.join(".ggen/test.ttl"))
        .current_dir(&project.project_dir)
        .env("GROQ_API_KEY", "") // No API key
        .output()
        .expect("Failed to run ggen sync");

    // Should fail gracefully with error message
    let stderr = String::from_utf8_lossy(&output.stderr);
    assert!(
        stderr.contains("API") || stderr.contains("Groq") || stderr.contains("llm"),
        "Expected error message about missing API key, got:\n{}",
        stderr
    );

    println!("✅ API key missing test PASSED (fails gracefully)");
}

#[test]
fn test_groq_api_key_is_set() {
    // This test just verifies the API key is available
    // It doesn't make any API calls
    let api_key = std::env::var("GROQ_API_KEY");

    match api_key {
        Ok(key) if !key.is_empty() => {
            println!("✅ GROQ_API_KEY is set ({} chars)", key.len());
            println!("   E2E tests with real API calls can run");
        }
        _ => {
            println!("⚠️  GROQ_API_KEY is not set");
            println!("   E2E tests with real API calls will be skipped");
        }
    }
}