cordance-llm 0.1.1

//! Bounded prompt builder. Prompts produced here tell the model exactly which
//! source IDs it may cite and forbid it from inventing facts or hard rules.

use cordance_core::source::SourceRecord;

/// Build a prompt that instructs the model to return JSON matching the
/// `cordance-llm-candidate.v1` schema, citing only sources from `sources`.
///
/// `doctrine_snippets` — `(topic, excerpt)` pairs from relevant doctrine files.
/// `task` — one sentence describing what the model should produce
///   (e.g. "summarise the project's main purpose").
///
/// The returned string is safe to pass directly to [`crate::ollama::OllamaAdapter::generate`].
#[must_use]
pub fn bounded_pack_summary_prompt(
    sources: &[SourceRecord],
    doctrine_snippets: &[(&str, &str)],
    task: &str,
) -> String {
    let source_id_list = sources
        .iter()
        .map(|s| format!("  - {}", s.id))
        .collect::<Vec<_>>()
        .join("\n");

    let snippet_block = if doctrine_snippets.is_empty() {
        String::new()
    } else {
        let formatted = doctrine_snippets
            .iter()
            .map(|(topic, excerpt)| format!("  [{topic}]\n  {excerpt}"))
            .collect::<Vec<_>>()
            .join("\n\n");
        format!("\n## Doctrine excerpts\n{formatted}\n")
    };

    format!(
        r#"You are a documentation assistant for the Cordance project.

## Task
{task}

## Output format
Return a single JSON object that exactly matches the `cordance-llm-candidate.v1` schema:

```json
{{
  "schema": "cordance-llm-candidate.v1",
  "candidate_id": "<uuid-v4>",
  "input_source_ids": [ /* copy the exact IDs from the list below */ ],
  "claims": [
    {{
      "text": "<claim prose>",
      "claim_type": "<one of: workflow_instruction | strong_preference | weak_preference | candidate_observation | rejected_approach | open_uncertainty | generated_summary>",
      "source_ids": [ "<id from the list below>" ],
      "confidence": "candidate"
    }}
  ]
}}
```

## Rules you MUST follow
- Do not invent facts.
- Do not create `hard_rule` or `project_invariant` claims. Those types are forbidden from LLM output.
- Every claim MUST cite at least one source_id from the list below.
- Do not cite a source_id that is not in the list below.
- The `input_source_ids` field must contain every source_id you cite across all claims.
- Return only the JSON object — no markdown fences, no commentary.
{snippet_block}
## Available source IDs
{source_id_list}
"#
    )
}

#[cfg(test)]
mod tests {
    use super::*;
    use cordance_core::source::SourceClass;

    fn make_source(id: &str, path: &str) -> SourceRecord {
        SourceRecord {
            id: id.into(),
            path: path.into(),
            class: SourceClass::ProjectAdr,
            sha256: "0".repeat(64),
            size_bytes: 0,
            modified: None,
            blocked: false,
            blocked_reason: None,
        }
    }

    #[test]
    fn prompt_contains_source_ids() {
        let sources = vec![
            make_source("project_adr:docs/adr/0001.md", "docs/adr/0001.md"),
            make_source("project_adr:docs/adr/0002.md", "docs/adr/0002.md"),
        ];
        let prompt = bounded_pack_summary_prompt(&sources, &[], "summarise the project");
        assert!(prompt.contains("project_adr:docs/adr/0001.md"));
        assert!(prompt.contains("project_adr:docs/adr/0002.md"));
    }

    #[test]
    fn prompt_contains_forbidden_rule_instruction() {
        let prompt = bounded_pack_summary_prompt(&[], &[], "describe the build system");
        assert!(prompt.contains("hard_rule"));
        assert!(prompt.contains("project_invariant"));
        assert!(prompt.contains("Do not invent facts"));
    }

    #[test]
    fn prompt_contains_schema_name() {
        let prompt = bounded_pack_summary_prompt(&[], &[], "anything");
        assert!(prompt.contains("cordance-llm-candidate.v1"));
    }

    #[test]
    fn prompt_includes_doctrine_snippets() {
        let snippets = vec![
            (
                "contracts",
                "Contracts-first means the schema is the source of truth.",
            ),
            ("testing", "All tests must be deterministic and repeatable."),
        ];
        let prompt = bounded_pack_summary_prompt(&[], &snippets, "describe testing");
        assert!(prompt.contains("Contracts-first means the schema is the source of truth."));
        assert!(prompt.contains("All tests must be deterministic and repeatable."));
    }

    #[test]
    fn prompt_contains_task() {
        let task = "summarise the project main purpose";
        let prompt = bounded_pack_summary_prompt(&[], &[], task);
        assert!(prompt.contains(task));
    }

    #[test]
    fn no_snippets_omits_doctrine_block() {
        let prompt = bounded_pack_summary_prompt(&[], &[], "any task");
        assert!(!prompt.contains("## Doctrine excerpts"));
    }
}