panini_lang_engine/
prompts.rs1use isolang::Language as IsoLang;
2use panini_core::traits::LinguisticDefinition;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7#[derive(Debug, thiserror::Error)]
10pub enum PromptBuilderError {
11 #[error("Failed to parse JSON schema: {0}")]
12 SchemaParseError(#[from] serde_json::Error),
13 #[error("Failed to load prompt config: {0}")]
14 ConfigLoadError(String),
15 #[error("Placeholder '{placeholder}' in template is not available in context")]
16 PlaceholderNotAvailable { placeholder: String },
17}
18
19#[derive(Debug, Clone, Deserialize, Serialize)]
22pub struct ExtractorPrompts {
23 pub system_role: String,
24 pub target_language: String,
25 pub extraction_directives: String,
26 pub learner_profile: LearnerProfile,
27 pub skill_context: SkillContextPrompts,
28 pub user_context: String,
29 pub output_instruction: String,
30}
31
32#[derive(Debug, Clone, Deserialize, Serialize)]
33pub struct LearnerProfile {
34 pub ui_language: String,
35 pub linguistic_background_intro: String,
36 pub linguistic_background_entry: String,
37}
38
39#[derive(Debug, Clone, Deserialize, Serialize)]
40pub struct SkillContextPrompts {
41 pub skill_tree_path: String,
42 pub pedagogical_focus: String,
43}
44
45impl ExtractorPrompts {
46 pub fn load(path: &str) -> Result<Self, PromptBuilderError> {
51 let content = std::fs::read_to_string(path).map_err(|e| {
52 PromptBuilderError::ConfigLoadError(format!("Failed to read {path}: {e}"))
53 })?;
54 serde_yml::from_str(&content).map_err(|e| {
55 PromptBuilderError::ConfigLoadError(format!("Failed to parse {path}: {e}"))
56 })
57 }
58}
59
60pub use panini_core::component::LanguageLevel;
64
65#[derive(bon::Builder)]
67pub struct ExtractionRequest {
68 pub content: String,
70 pub targets: Vec<String>,
72 pub pedagogical_context: Option<String>,
74 pub skill_path: Option<String>,
76 #[builder(default = "English".to_string())]
78 pub learner_ui_language: String,
79 #[builder(default)]
81 pub linguistic_background: Vec<LanguageLevel>,
82 pub user_prompt: Option<String>,
84}
85
86#[must_use]
90pub fn wrap_tag(tag: &str, content: &str) -> String {
91 format!("<{tag}>\n{content}\n</{tag}>")
92}
93
94pub fn interpolate<V: AsRef<str>, S: std::hash::BuildHasher>(
102 template: &str,
103 context: &HashMap<&str, V, S>,
104) -> Result<String, PromptBuilderError> {
105 let placeholder_re = Regex::new(r"\{(\w+)\}").unwrap();
106 let mut result = template.to_string();
107
108 for cap in placeholder_re.captures_iter(template) {
109 let placeholder = &cap[1];
110 let value = context
111 .get(placeholder)
112 .ok_or_else(|| PromptBuilderError::PlaceholderNotAvailable {
113 placeholder: placeholder.to_string(),
114 })?
115 .as_ref();
116 result = result.replace(&format!("{{{placeholder}}}"), value);
117 }
118
119 Ok(result)
120}
121
122pub fn build_extraction_prompt<L: LinguisticDefinition>(
129 language: &L,
130 request: &ExtractionRequest,
131 extractor_prompts: &ExtractorPrompts,
132) -> Result<String, PromptBuilderError> {
133 let cfg = extractor_prompts;
134
135 let ui_lang_name = &request.learner_ui_language;
136 let ui_lang_iso_code = IsoLang::from_name(ui_lang_name)
137 .map_or_else(|| "eng".to_string(), |lang| lang.to_639_3().to_string());
138
139 let context_description = request.user_prompt.as_deref().unwrap_or("");
140 let skill_path = request.skill_path.as_deref().unwrap_or("");
141 let instructions = request.pedagogical_context.as_deref().unwrap_or("");
142
143 let mut global_ctx = HashMap::new();
144 global_ctx.insert("language", language.name().to_string());
145 global_ctx.insert("directives", language.extraction_directives().to_string());
146 global_ctx.insert("path", skill_path.to_string());
147 global_ctx.insert("instructions", instructions.to_string());
148 global_ctx.insert("iso", ui_lang_iso_code);
149 global_ctx.insert("name", ui_lang_name.clone());
150 global_ctx.insert("context_description", context_description.to_string());
151
152 let mut blocks = Vec::new();
153
154 blocks.push(cfg.system_role.clone());
156
157 let language_context = interpolate(&cfg.target_language, &global_ctx)?;
159 blocks.push(wrap_tag("target_language", &language_context));
160
161 let extraction_directives = interpolate(&cfg.extraction_directives, &global_ctx)?;
163 blocks.push(wrap_tag("extraction_directives", &extraction_directives));
164
165 let mut learner_profile_content = String::new();
167
168 let mut ui_lang_ctx = global_ctx.clone();
169 ui_lang_ctx.insert("language", ui_lang_name.clone());
170 let ui_lang_str = interpolate(&cfg.learner_profile.ui_language, &ui_lang_ctx)?;
171 learner_profile_content.push_str(&ui_lang_str);
172
173 if !request.linguistic_background.is_empty() {
174 learner_profile_content.push_str("\n\n");
175 learner_profile_content.push_str(&cfg.learner_profile.linguistic_background_intro);
176 learner_profile_content.push('\n');
177
178 for lang in &request.linguistic_background {
179 let mut ctx = global_ctx.clone();
180 ctx.insert("iso", lang.iso_639_3.clone());
181 ctx.insert("level", lang.level.clone());
182 let entry = interpolate(&cfg.learner_profile.linguistic_background_entry, &ctx)?;
183 learner_profile_content.push_str(&entry);
184 learner_profile_content.push('\n');
185 }
186 }
187
188 blocks.push(wrap_tag("learner_profile", &learner_profile_content));
189
190 let mut skill_context_content = String::new();
192 let skill_path_str = interpolate(&cfg.skill_context.skill_tree_path, &global_ctx)?;
193 skill_context_content.push_str(&skill_path_str);
194
195 if request.pedagogical_context.is_some() {
196 skill_context_content.push('\n');
197 let ped_focus_str = interpolate(&cfg.skill_context.pedagogical_focus, &global_ctx)?;
198 skill_context_content.push_str(&ped_focus_str);
199 }
200
201 blocks.push(wrap_tag("skill_context", &skill_context_content));
202
203 if !context_description.is_empty() {
205 let user_context_str = interpolate(&cfg.user_context, &global_ctx)?;
206 blocks.push(wrap_tag("user_context", &user_context_str));
207 }
208
209 if let Some(morph_directives) = language.extra_extraction_directives() {
211 blocks.push(wrap_tag("morpheme_segmentation", &morph_directives));
212 }
213
214 blocks.push(wrap_tag("output", &cfg.output_instruction));
216
217 Ok(blocks.join("\n\n"))
218}