use panini_core::traits::LinguisticDefinition;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use regex::Regex;
use isolang::Language as IsoLang;
#[derive(Debug, thiserror::Error)]
pub enum PromptBuilderError {
#[error("Failed to parse JSON schema: {0}")]
SchemaParseError(#[from] serde_json::Error),
#[error("Failed to load prompt config: {0}")]
ConfigLoadError(String),
#[error("Placeholder '{placeholder}' in template is not available in context")]
PlaceholderNotAvailable { placeholder: String },
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ExtractorPrompts {
pub system_role: String,
pub target_language: String,
pub extraction_directives: String,
pub learner_profile: LearnerProfile,
pub skill_context: SkillContextPrompts,
pub user_context: String,
pub output_instruction: String,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct LearnerProfile {
pub ui_language: String,
pub linguistic_background_intro: String,
pub linguistic_background_entry: String,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkillContextPrompts {
pub skill_tree_path: String,
pub pedagogical_focus: String,
}
impl ExtractorPrompts {
pub fn load(path: &str) -> Result<Self, PromptBuilderError> {
let content = std::fs::read_to_string(path)
.map_err(|e| PromptBuilderError::ConfigLoadError(format!("Failed to read {path}: {e}")))?;
serde_yml::from_str(&content)
.map_err(|e| PromptBuilderError::ConfigLoadError(format!("Failed to parse {path}: {e}")))
}
}
pub use panini_core::component::LanguageLevel;
#[derive(bon::Builder)]
pub struct ExtractionRequest {
pub content: String,
pub targets: Vec<String>,
pub pedagogical_context: Option<String>,
pub skill_path: Option<String>,
#[builder(default = "English".to_string())]
pub learner_ui_language: String,
#[builder(default)]
pub linguistic_background: Vec<LanguageLevel>,
pub user_prompt: Option<String>,
}
#[must_use]
pub fn wrap_tag(tag: &str, content: &str) -> String {
format!("<{tag}>\n{content}\n</{tag}>")
}
pub fn interpolate<V: AsRef<str>, S: std::hash::BuildHasher>(template: &str, context: &HashMap<&str, V, S>) -> Result<String, PromptBuilderError> {
let placeholder_re = Regex::new(r"\{(\w+)\}").unwrap();
let mut result = template.to_string();
for cap in placeholder_re.captures_iter(template) {
let placeholder = &cap[1];
let value = context.get(placeholder)
.ok_or_else(|| PromptBuilderError::PlaceholderNotAvailable {
placeholder: placeholder.to_string(),
})?
.as_ref();
result = result.replace(&format!("{{{placeholder}}}"), value);
}
Ok(result)
}
pub fn build_extraction_prompt<L: LinguisticDefinition>(
language: &L,
request: &ExtractionRequest,
extractor_prompts: &ExtractorPrompts,
) -> Result<String, PromptBuilderError> {
let cfg = extractor_prompts;
let ui_lang_name = &request.learner_ui_language;
let ui_lang_iso_code = IsoLang::from_name(ui_lang_name).map_or_else(|| "eng".to_string(), |lang| lang.to_639_3().to_string());
let context_description = request.user_prompt.as_deref().unwrap_or("");
let skill_path = request.skill_path.as_deref().unwrap_or("");
let instructions = request.pedagogical_context.as_deref().unwrap_or("");
let mut global_ctx = HashMap::new();
global_ctx.insert("language", language.name().to_string());
global_ctx.insert("directives", language.extraction_directives().to_string());
global_ctx.insert("path", skill_path.to_string());
global_ctx.insert("instructions", instructions.to_string());
global_ctx.insert("iso", ui_lang_iso_code);
global_ctx.insert("name", ui_lang_name.clone());
global_ctx.insert("context_description", context_description.to_string());
let mut blocks = Vec::new();
blocks.push(cfg.system_role.clone());
let language_context = interpolate(&cfg.target_language, &global_ctx)?;
blocks.push(wrap_tag("target_language", &language_context));
let extraction_directives = interpolate(&cfg.extraction_directives, &global_ctx)?;
blocks.push(wrap_tag("extraction_directives", &extraction_directives));
let mut learner_profile_content = String::new();
let mut ui_lang_ctx = global_ctx.clone();
ui_lang_ctx.insert("language", ui_lang_name.clone());
let ui_lang_str = interpolate(&cfg.learner_profile.ui_language, &ui_lang_ctx)?;
learner_profile_content.push_str(&ui_lang_str);
if !request.linguistic_background.is_empty() {
learner_profile_content.push_str("\n\n");
learner_profile_content.push_str(&cfg.learner_profile.linguistic_background_intro);
learner_profile_content.push('\n');
for lang in &request.linguistic_background {
let mut ctx = global_ctx.clone();
ctx.insert("iso", lang.iso_639_3.clone());
ctx.insert("level", lang.level.clone());
let entry = interpolate(&cfg.learner_profile.linguistic_background_entry, &ctx)?;
learner_profile_content.push_str(&entry);
learner_profile_content.push('\n');
}
}
blocks.push(wrap_tag("learner_profile", &learner_profile_content));
let mut skill_context_content = String::new();
let skill_path_str = interpolate(&cfg.skill_context.skill_tree_path, &global_ctx)?;
skill_context_content.push_str(&skill_path_str);
if request.pedagogical_context.is_some() {
skill_context_content.push('\n');
let ped_focus_str = interpolate(&cfg.skill_context.pedagogical_focus, &global_ctx)?;
skill_context_content.push_str(&ped_focus_str);
}
blocks.push(wrap_tag("skill_context", &skill_context_content));
if !context_description.is_empty() {
let user_context_str = interpolate(&cfg.user_context, &global_ctx)?;
blocks.push(wrap_tag("user_context", &user_context_str));
}
if let Some(morph_directives) = language.extra_extraction_directives() {
blocks.push(wrap_tag("morpheme_segmentation", &morph_directives));
}
blocks.push(wrap_tag("output", &cfg.output_instruction));
Ok(blocks.join("\n\n"))
}