Skip to main content

panini_lang_engine/
prompts.rs

1use isolang::Language as IsoLang;
2use panini_core::traits::LinguisticDefinition;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7// ----- Prompt Builder Errors -----
8
9#[derive(Debug, thiserror::Error)]
10pub enum PromptBuilderError {
11    #[error("Failed to parse JSON schema: {0}")]
12    SchemaParseError(#[from] serde_json::Error),
13    #[error("Failed to load prompt config: {0}")]
14    ConfigLoadError(String),
15    #[error("Placeholder '{placeholder}' in template is not available in context")]
16    PlaceholderNotAvailable { placeholder: String },
17}
18
19// ----- Prompt Config Structs -----
20
21#[derive(Debug, Clone, Deserialize, Serialize)]
22pub struct ExtractorPrompts {
23    pub system_role: String,
24    pub target_language: String,
25    pub extraction_directives: String,
26    pub learner_profile: LearnerProfile,
27    pub skill_context: SkillContextPrompts,
28    pub user_context: String,
29    pub output_instruction: String,
30}
31
32#[derive(Debug, Clone, Deserialize, Serialize)]
33pub struct LearnerProfile {
34    pub ui_language: String,
35    pub linguistic_background_intro: String,
36    pub linguistic_background_entry: String,
37}
38
39#[derive(Debug, Clone, Deserialize, Serialize)]
40pub struct SkillContextPrompts {
41    pub skill_tree_path: String,
42    pub pedagogical_focus: String,
43}
44
45impl ExtractorPrompts {
46    /// Load prompts from a YAML file.
47    ///
48    /// # Errors
49    /// Returns an error if the file cannot be read or parsed.
50    pub fn load(path: &str) -> Result<Self, PromptBuilderError> {
51        let content = std::fs::read_to_string(path).map_err(|e| {
52            PromptBuilderError::ConfigLoadError(format!("Failed to read {path}: {e}"))
53        })?;
54        serde_yml::from_str(&content).map_err(|e| {
55            PromptBuilderError::ConfigLoadError(format!("Failed to parse {path}: {e}"))
56        })
57    }
58}
59
60// ----- Extraction Request -----
61
62/// Re-export from panini-core for backwards compatibility.
63pub use panini_core::component::LanguageLevel;
64
65/// Generic extraction request
66#[derive(bon::Builder)]
67pub struct ExtractionRequest {
68    /// The text/card JSON to extract features from.
69    pub content: String,
70    /// Target words to focus extraction on.
71    pub targets: Vec<String>,
72    /// Optional pedagogical context (replaces skill node instructions).
73    pub pedagogical_context: Option<String>,
74    /// Optional skill/topic path for context.
75    pub skill_path: Option<String>,
76    /// Learner's UI language (for pedagogical explanation).
77    #[builder(default = "English".to_string())]
78    pub learner_ui_language: String,
79    /// Learner's linguistic background.
80    #[builder(default)]
81    pub linguistic_background: Vec<LanguageLevel>,
82    /// Optional user-provided context.
83    pub user_prompt: Option<String>,
84}
85
86// ----- Helper Functions -----
87
88/// Wraps content in XML tags
89#[must_use]
90pub fn wrap_tag(tag: &str, content: &str) -> String {
91    format!("<{tag}>\n{content}\n</{tag}>")
92}
93
94/// Interpolates placeholders in a template string
95///
96/// # Panics
97/// Panics if the internal regex fails to compile.
98///
99/// # Errors
100/// Returns an error if a placeholder requires a value not present in the context.
101pub fn interpolate<V: AsRef<str>, S: std::hash::BuildHasher>(
102    template: &str,
103    context: &HashMap<&str, V, S>,
104) -> Result<String, PromptBuilderError> {
105    let placeholder_re = Regex::new(r"\{(\w+)\}").unwrap();
106    let mut result = template.to_string();
107
108    for cap in placeholder_re.captures_iter(template) {
109        let placeholder = &cap[1];
110        let value = context
111            .get(placeholder)
112            .ok_or_else(|| PromptBuilderError::PlaceholderNotAvailable {
113                placeholder: placeholder.to_string(),
114            })?
115            .as_ref();
116        result = result.replace(&format!("{{{placeholder}}}"), value);
117    }
118
119    Ok(result)
120}
121
122// ----- Feature Extractor Prompt Context -----
123
124/// Builds the system prompt for the feature extractor.
125///
126/// # Errors
127/// Returns an error if prompt interpolation fails (e.g. missing context variables).
128pub fn build_extraction_prompt<L: LinguisticDefinition>(
129    language: &L,
130    request: &ExtractionRequest,
131    extractor_prompts: &ExtractorPrompts,
132) -> Result<String, PromptBuilderError> {
133    let cfg = extractor_prompts;
134
135    let ui_lang_name = &request.learner_ui_language;
136    let ui_lang_iso_code = IsoLang::from_name(ui_lang_name)
137        .map_or_else(|| "eng".to_string(), |lang| lang.to_639_3().to_string());
138
139    let context_description = request.user_prompt.as_deref().unwrap_or("");
140    let skill_path = request.skill_path.as_deref().unwrap_or("");
141    let instructions = request.pedagogical_context.as_deref().unwrap_or("");
142
143    let mut global_ctx = HashMap::new();
144    global_ctx.insert("language", language.name().to_string());
145    global_ctx.insert("directives", language.extraction_directives().to_string());
146    global_ctx.insert("path", skill_path.to_string());
147    global_ctx.insert("instructions", instructions.to_string());
148    global_ctx.insert("iso", ui_lang_iso_code);
149    global_ctx.insert("name", ui_lang_name.clone());
150    global_ctx.insert("context_description", context_description.to_string());
151
152    let mut blocks = Vec::new();
153
154    // System role
155    blocks.push(cfg.system_role.clone());
156
157    // Target language section
158    let language_context = interpolate(&cfg.target_language, &global_ctx)?;
159    blocks.push(wrap_tag("target_language", &language_context));
160
161    // Extraction directives section
162    let extraction_directives = interpolate(&cfg.extraction_directives, &global_ctx)?;
163    blocks.push(wrap_tag("extraction_directives", &extraction_directives));
164
165    // Learner profile section
166    let mut learner_profile_content = String::new();
167
168    let mut ui_lang_ctx = global_ctx.clone();
169    ui_lang_ctx.insert("language", ui_lang_name.clone());
170    let ui_lang_str = interpolate(&cfg.learner_profile.ui_language, &ui_lang_ctx)?;
171    learner_profile_content.push_str(&ui_lang_str);
172
173    if !request.linguistic_background.is_empty() {
174        learner_profile_content.push_str("\n\n");
175        learner_profile_content.push_str(&cfg.learner_profile.linguistic_background_intro);
176        learner_profile_content.push('\n');
177
178        for lang in &request.linguistic_background {
179            let mut ctx = global_ctx.clone();
180            ctx.insert("iso", lang.iso_639_3.clone());
181            ctx.insert("level", lang.level.clone());
182            let entry = interpolate(&cfg.learner_profile.linguistic_background_entry, &ctx)?;
183            learner_profile_content.push_str(&entry);
184            learner_profile_content.push('\n');
185        }
186    }
187
188    blocks.push(wrap_tag("learner_profile", &learner_profile_content));
189
190    // Skill context section
191    let mut skill_context_content = String::new();
192    let skill_path_str = interpolate(&cfg.skill_context.skill_tree_path, &global_ctx)?;
193    skill_context_content.push_str(&skill_path_str);
194
195    if request.pedagogical_context.is_some() {
196        skill_context_content.push('\n');
197        let ped_focus_str = interpolate(&cfg.skill_context.pedagogical_focus, &global_ctx)?;
198        skill_context_content.push_str(&ped_focus_str);
199    }
200
201    blocks.push(wrap_tag("skill_context", &skill_context_content));
202
203    // User context section (if provided)
204    if !context_description.is_empty() {
205        let user_context_str = interpolate(&cfg.user_context, &global_ctx)?;
206        blocks.push(wrap_tag("user_context", &user_context_str));
207    }
208
209    // Morpheme segmentation directives (agglutinative languages only)
210    if let Some(morph_directives) = language.extra_extraction_directives() {
211        blocks.push(wrap_tag("morpheme_segmentation", &morph_directives));
212    }
213
214    // Output instruction section
215    blocks.push(wrap_tag("output", &cfg.output_instruction));
216
217    Ok(blocks.join("\n\n"))
218}