use crate::concepts::{extract_concept_query, lookup_concept_query};
use crate::engine::{normalize_prompt, SymbolicAnswer};
use crate::event_log::EventLog;
use crate::language::{detect as detect_language, Language};
use crate::seed;
use crate::solver_handlers::{finalize_simple, try_concept_lookup};
use crate::solver_helpers::{last_assistant_turn, last_user_turn};
use crate::web_search_core::{WEB_SEARCH_PROVIDERS, WEB_SEARCH_RRF_K};
struct HowItWorksQuery {
subject: Option<String>,
}
struct ProceduralHowToTask {
task: String,
action: String,
object: String,
corrections: Vec<SpellingCorrection>,
}
#[derive(Debug, Clone)]
struct SpellingCorrection {
from: String,
to: String,
}
pub fn try_how_to_procedure(
prompt: &str,
normalized: &str,
log: &mut EventLog,
) -> Option<SymbolicAnswer> {
let task = extract_procedural_how_to_task(normalized)?;
for correction in &task.corrections {
log.append(
"spelling_correction",
format!("{}->{}", correction.from, correction.to),
);
}
let is_install_procedure = task.action == "install";
let fallback_query = format!("how to {}", task.task);
let search_query = procedural_search_query(&task);
let wikihow_candidate = wikihow_page_title(&task.task);
let wikihow_api_url = format!(
"https://www.wikihow.com/api.php?action=parse&page={wikihow_candidate}\
&prop=text%7Csections%7Cdisplaytitle&format=json&origin=*"
);
log.append("procedural_how_to:request", task.task.clone());
log.append("procedural_how_to:action", task.action.clone());
if !task.object.is_empty() {
log.append("procedural_how_to:object", task.object.clone());
}
if is_install_procedure {
log.append(
"procedural_how_to:stage",
"official_documentation".to_owned(),
);
log.append(
"procedural_how_to:source_gate",
"official_documentation_first".to_owned(),
);
}
log.append("procedural_how_to:stage", "wikipedia".to_owned());
log.append("procedural_how_to:stage", "wikidata".to_owned());
log.append("procedural_how_to:stage", "wikihow_api".to_owned());
log.append(
"procedural_how_to:wikihow_candidate",
wikihow_candidate.clone(),
);
log.append("http_fetch:request", wikihow_api_url.clone());
log.append("procedural_how_to:stage", "web_search".to_owned());
log.append("web_search:request", search_query.clone());
if is_install_procedure && search_query != fallback_query {
log.append("web_search:request", fallback_query.clone());
}
for provider in WEB_SEARCH_PROVIDERS {
log.append("web_search:provider", (*provider).to_owned());
}
log.append("web_search:combined", format!("rrf:k={WEB_SEARCH_RRF_K}"));
log.append(
"procedural_how_to:stage",
"recursive_fetch_check".to_owned(),
);
log.append(
"procedural_how_to:source_gate",
"explicit_steps_only".to_owned(),
);
let provider_summary = WEB_SEARCH_PROVIDERS.join(", ");
let body = render_procedural_how_to_body(
&task,
&wikihow_candidate,
&wikihow_api_url,
&search_query,
&fallback_query,
&provider_summary,
detect_language(prompt),
);
Some(finalize_simple(
prompt,
log,
"procedural_how_to",
"response:procedural_how_to",
&body,
0.78,
))
}
pub fn looks_like_procedural_how_to(normalized: &str) -> bool {
extract_procedural_how_to_task(normalized).is_some()
}
pub fn try_procedural_how_to_followup(
prompt: &str,
normalized: &str,
log: &mut EventLog,
) -> Option<SymbolicAnswer> {
let canonical = normalize_prompt(prompt);
let canonical = if canonical.is_empty() {
normalized
} else {
canonical.as_str()
};
if !is_procedural_elaboration_request(canonical) {
return None;
}
last_assistant_turn(log)?;
let prior_user = last_user_turn(log)?.to_owned();
let prior_normalized = normalize_prompt(&prior_user);
let task = extract_procedural_how_to_task(&prior_normalized)?;
log.append("procedural_how_to:followup", canonical.to_owned());
log.append("procedural_how_to:followup_task", task.task);
try_how_to_procedure(&prior_user, &prior_normalized, log)
}
fn is_procedural_elaboration_request(normalized: &str) -> bool {
seed::lexicon()
.meanings_with_role(seed::ROLE_PROCEDURAL_ELABORATION)
.any(|meaning| meaning.evidenced_in(normalized))
}
pub fn try_how_it_works(
prompt: &str,
normalized: &str,
log: &mut EventLog,
) -> Option<SymbolicAnswer> {
let query = extract_how_it_works_query(prompt, normalized)?;
log.append("followup:how_it_works", normalized.to_owned());
if let Some(ref term) = query.subject {
log.append(
"followup:subject",
format!("inline:{}", term.to_lowercase()),
);
let concept_prompt = format!("what is {term}");
if let Some(concept_query) = extract_concept_query(&concept_prompt) {
if lookup_concept_query(&concept_query).is_some() {
return try_concept_lookup(&concept_prompt, log);
}
}
record_mechanism_query(log, term);
let body = render_mechanism_discovery_answer(term, detect_language(prompt));
return Some(finalize_simple(
prompt,
log,
"how_it_works",
"response:how_it_works",
&body,
0.68,
));
}
if let Some(prior) = last_assistant_turn(log).map(str::to_owned) {
log.append("followup:prior_turn", "assistant".to_owned());
if let Some(term) = extract_topic_from_prior_reply(&prior) {
use crate::concepts::{extract_concept_query, lookup_concept_query};
if let Some(query) = extract_concept_query(&format!("what is {term}")) {
if lookup_concept_query(&query).is_some() {
log.append("followup:subject", format!("prior_reply:{term}"));
return try_concept_lookup(&format!("what is {term}"), log);
}
}
let body = format!(
"To explain how {term} works: I know the term from the prior conversation \
but do not have a detailed symbolic rule for it yet. Add a Links Notation \
fact with the mechanism description, then ask again."
);
log.append("followup:subject", format!("prior_reply_no_record:{term}"));
return Some(finalize_simple(
prompt,
log,
"concept_elaboration_missing",
"response:concept_elaboration_missing",
&body,
0.3,
));
}
}
let body = String::from(
"I answered that way because the prompt matched a deterministic Links Notation rule. \
To ask about a specific topic, try \"how does X work?\" where X is a concept I know \
(e.g. \"how does Wikipedia work?\"). The evidence and trace events are appended to \
the log; see the trace link for the full chain.",
);
Some(finalize_simple(
prompt,
log,
"meta_explanation",
"response:meta_explanation",
&body,
0.5,
))
}
fn extract_how_it_works_query(prompt: &str, _normalized: &str) -> Option<HowItWorksQuery> {
let original = clean_mechanism_fragment(prompt);
if original.is_empty() {
return None;
}
let lower = original.to_lowercase();
if is_bare_how_it_works(&lower) {
return Some(HowItWorksQuery { subject: None });
}
extract_how_it_works_subject(&original, &lower).map(|subject| HowItWorksQuery {
subject: Some(subject),
})
}
fn is_bare_how_it_works(lower: &str) -> bool {
seed::lexicon()
.role_word_forms(seed::ROLE_MECHANISM_INQUIRY)
.into_iter()
.filter(|form| form.slot() == seed::Slot::Bare)
.any(|form| {
let phrase = form.text.as_str();
lower == phrase
|| lower
.strip_prefix(phrase)
.is_some_and(|rest| rest.starts_with(' '))
})
}
fn extract_how_it_works_subject(original: &str, lower: &str) -> Option<String> {
let forms = seed::lexicon().role_word_forms(seed::ROLE_MECHANISM_INQUIRY);
for form in forms.iter().filter(|f| f.slot() == seed::Slot::Prefix) {
if let Some(subject) = subject_after_prefix(original, lower, form.before_slot()) {
return strip_mechanism_tail(&subject);
}
}
for form in forms.iter().filter(|f| f.slot() == seed::Slot::Circumfix) {
if let Some(subject) =
subject_between(original, lower, form.before_slot(), &[form.after_slot()])
{
return Some(subject);
}
}
for form in forms.iter().filter(|f| f.slot() == seed::Slot::Suffix) {
if let Some(subject) = subject_before_suffix(original, lower, form.after_slot()) {
return Some(subject);
}
}
None
}
fn subject_after_prefix(original: &str, lower: &str, prefix: &str) -> Option<String> {
lower.strip_prefix(prefix)?;
let rest = original.get(prefix.len()..)?;
clean_mechanism_subject(rest)
}
fn subject_before_suffix(original: &str, lower: &str, suffix: &str) -> Option<String> {
lower.strip_suffix(suffix)?;
let end = original.len().checked_sub(suffix.len())?;
clean_mechanism_subject(original.get(..end)?)
}
fn subject_between(original: &str, lower: &str, prefix: &str, suffixes: &[&str]) -> Option<String> {
if !lower.starts_with(prefix) {
return None;
}
for suffix in suffixes {
if lower.ends_with(suffix) {
let end = original.len().checked_sub(suffix.len())?;
if end <= prefix.len() {
return None;
}
return clean_mechanism_subject(original.get(prefix.len()..end)?);
}
}
None
}
fn strip_mechanism_tail(subject: &str) -> Option<String> {
let mut clean = clean_mechanism_subject(subject)?;
let lower = clean.to_lowercase();
for form in &seed::lexicon().role_word_forms(seed::ROLE_MECHANISM_PREDICATE) {
let suffix = form.after_slot();
if lower.ends_with(suffix) {
let end = clean.len().checked_sub(suffix.len())?;
clean.truncate(end);
return clean_mechanism_subject(&clean);
}
}
Some(clean)
}
fn clean_mechanism_fragment(value: &str) -> String {
value
.trim()
.trim_matches(|character: char| {
matches!(
character,
'`' | '"' | '\'' | '«' | '»' | '<' | '>' | '(' | ')' | '[' | ']' | '{' | '}'
)
})
.trim_end_matches(['?', '?', '。', '.', '!', ',', ';', ':'])
.trim()
.to_owned()
}
fn clean_mechanism_subject(value: &str) -> Option<String> {
let mut clean = clean_mechanism_fragment(value);
for form in &seed::lexicon().role_word_forms(seed::ROLE_DETAIL_MODIFIER) {
let suffix = form.after_slot();
let lower = clean.to_lowercase();
if lower.ends_with(suffix) {
let end = clean.len().checked_sub(suffix.len())?;
clean.truncate(end);
clean = clean_mechanism_fragment(&clean);
}
}
let lower = clean.to_lowercase();
let non_referential = seed::lexicon()
.role_word_forms(seed::ROLE_NON_REFERENTIAL_SUBJECT)
.iter()
.any(|form| match form.slot() {
seed::Slot::Bare => lower == form.text,
seed::Slot::Prefix => lower.starts_with(form.before_slot()),
_ => false,
});
if clean.is_empty() || non_referential {
return None;
}
Some(clean)
}
fn record_mechanism_query(log: &mut EventLog, subject: &str) {
log.append("mechanism_query:request", subject.to_owned());
for stage in ["wikipedia", "wikidata", "web_search"] {
log.append("mechanism_query:stage", stage.to_owned());
}
log.append("web_search:request", format!("how {subject} works"));
for provider in WEB_SEARCH_PROVIDERS {
log.append("web_search:provider", (*provider).to_owned());
}
log.append("web_search:combined", format!("rrf:k={WEB_SEARCH_RRF_K}"));
log.append(
"mechanism_query:source_gate",
"source_backed_mechanism_only".to_owned(),
);
}
fn render_mechanism_discovery_answer(subject: &str, language: Language) -> String {
let provider_summary = WEB_SEARCH_PROVIDERS.join(", ");
match language {
Language::Russian => format!(
"План поиска механизма для `{subject}`.\n\n\
Я не отвечаю на это из зашитого факта. Решатель трактует запрос \
как вопрос о том, как устроен или работает `{subject}`: сначала \
проверяет Wikipedia для обзорного источника, затем Wikidata для \
связей сущности, затем веб-поиск через {provider_summary}. Если \
источники не объясняют механизм, ответ должен попросить источник \
или более узкий термин, а не выдумывать детали."
),
Language::Hindi => format!(
"`{subject}` के लिए mechanism discovery plan.\n\n\
I do not answer this from a memoized fact. The solver treats the \
prompt as a question about how `{subject}` works, checks Wikipedia \
for a source-backed overview, Wikidata for entity relationships, \
then web search across {provider_summary}. If no source explains \
the mechanism, it should ask for a source or a narrower term."
),
Language::Chinese => format!(
"`{subject}` 的机制发现计划。\n\n\
I do not answer this from a memoized fact. The solver treats the \
prompt as a question about how `{subject}` works, checks Wikipedia \
for a source-backed overview, Wikidata for entity relationships, \
then web search across {provider_summary}. If no source explains \
the mechanism, it should ask for a source or a narrower term."
),
Language::English | Language::Unknown => format!(
"Mechanism discovery plan for `{subject}`.\n\n\
I do not answer this from a memoized fact. The solver treats the \
prompt as a question about how `{subject}` works, checks Wikipedia \
for a source-backed overview, Wikidata for entity relationships, \
then web search across {provider_summary}. If no source explains \
the mechanism, it should ask for a source or a narrower term \
instead of inventing details."
),
}
}
fn extract_procedural_how_to_task(normalized: &str) -> Option<ProceduralHowToTask> {
let clean_prompt = clean_procedural_fragment(normalized);
for form in seed::lexicon().role_word_forms(seed::ROLE_PROCEDURAL_REQUEST) {
if let Some(rest) = clean_prompt.strip_prefix(form.before_slot()) {
let action_override = (!form.action.is_empty()).then_some(form.action.as_str());
return build_procedural_task(rest, action_override);
}
}
extract_elided_procedural_how_to_task(&clean_prompt)
}
fn extract_elided_procedural_how_to_task(clean_prompt: &str) -> Option<ProceduralHowToTask> {
for form in seed::lexicon().role_word_forms(seed::ROLE_PROCEDURAL_REQUEST_ELIDED_LEAD) {
let Some(rest) = clean_prompt.strip_prefix(form.before_slot()) else {
continue;
};
let task = clean_procedural_fragment(rest);
if task.is_empty() {
continue;
}
let (task, corrections) = correct_common_procedural_typos(&task);
let Some((action, object)) = split_known_procedural_action_object(&task) else {
continue;
};
if object.is_empty() {
continue;
}
return Some(ProceduralHowToTask {
task,
action,
object,
corrections,
});
}
None
}
fn procedural_search_query(task: &ProceduralHowToTask) -> String {
if task.action == "install" {
let target = if task.object.is_empty() {
task.task.as_str()
} else {
task.object.as_str()
};
return format!("{target} install official documentation");
}
format!("how to {}", task.task)
}
fn split_known_procedural_action_object(task: &str) -> Option<(String, String)> {
let mut forms = seed::lexicon().role_word_forms(seed::ROLE_PROCEDURAL_ACTION_VERB);
forms.sort_by_key(|form| std::cmp::Reverse(form.text.chars().count()));
for form in forms {
let action_surface = form.text.trim();
if action_surface.is_empty() {
continue;
}
let Some(rest) = task.strip_prefix(action_surface) else {
continue;
};
if !rest.is_empty()
&& !rest.chars().next().is_some_and(char::is_whitespace)
&& !crate::coding::contains_cjk(action_surface)
{
continue;
}
let action = if form.action.is_empty() {
action_surface.to_owned()
} else {
form.action.clone()
};
return Some((action, clean_procedural_fragment(rest)));
}
None
}
fn build_procedural_task(
raw_task: &str,
action_override: Option<&str>,
) -> Option<ProceduralHowToTask> {
let task = clean_procedural_fragment(raw_task);
if task.is_empty() {
return None;
}
let (task, corrections) = correct_common_procedural_typos(&task);
let (action, object) = if let Some(action) = action_override {
(action.to_owned(), task.clone())
} else {
split_procedural_action_object(&task)?
};
Some(ProceduralHowToTask {
task,
action,
object,
corrections,
})
}
fn split_procedural_action_object(task: &str) -> Option<(String, String)> {
let mut parts = task.splitn(2, char::is_whitespace);
let action = parts.next()?.trim();
if action.is_empty() {
return None;
}
let object = parts.next().unwrap_or("").trim();
Some((action.to_owned(), object.to_owned()))
}
fn clean_procedural_fragment(value: &str) -> String {
let mut clean = value
.trim()
.trim_matches(|character: char| matches!(character, '`' | '"' | '\'' | ' '))
.trim_end_matches(['?', '!', '.', ',', ';', ':'])
.trim()
.to_owned();
for form in &seed::lexicon().role_word_forms(seed::ROLE_PROCEDURAL_TASK_MODIFIER) {
if let Some(stripped) = clean.strip_suffix(form.after_slot()) {
clean = stripped.trim().to_owned();
break;
}
}
clean
}
fn correct_common_procedural_typos(task: &str) -> (String, Vec<SpellingCorrection>) {
let lexicon = seed::lexicon();
let typos = lexicon.role_word_forms(seed::ROLE_COMMON_TYPO);
let mut corrections: Vec<SpellingCorrection> = Vec::new();
let corrected = task
.split_whitespace()
.map(|token| {
for form in &typos {
if token == form.text {
if !corrections.iter().any(|seen| seen.from == form.text) {
corrections.push(SpellingCorrection {
from: form.text.clone(),
to: form.action.clone(),
});
}
return form.action.clone();
}
}
token.to_owned()
})
.collect::<Vec<_>>()
.join(" ");
(corrected, corrections)
}
fn wikihow_page_title(task: &str) -> String {
task.split(|character: char| !character.is_alphanumeric())
.filter(|word| !word.is_empty())
.map(capitalize_word)
.collect::<Vec<_>>()
.join("-")
}
fn render_procedural_how_to_body(
task: &ProceduralHowToTask,
wikihow_candidate: &str,
wikihow_api_url: &str,
search_query: &str,
fallback_query: &str,
provider_summary: &str,
language: Language,
) -> String {
let is_install_procedure = task.action == "install";
match language {
Language::Russian => {
let official_docs_gate = if is_install_procedure {
format!(
"Для задач установки первый source gate ищет официальную \
документацию продукта или официальную страницу установки в \
репозитории, а уже потом переходит к общим how-to источникам. \
Он начинает с web search запроса `{search_query}` и держит \
общий how-to запрос `{fallback_query}` как fallback. "
)
} else {
String::new()
};
format!(
"План поиска процедуры для `{}` (действие `{}`, объект `{}`).\n\n\
{}Я не отвечаю на это как на заученный рецепт. Сначала solver проверяет \
Wikipedia для контекста темы и Wikidata для подсказок сущности, действия \
и объекта. Затем он пробует CORS-readable MediaWiki parse API wikiHow \
для кандидата `{}` через `{}`. Если эти источники не дают пригодные шаги, \
fallback запускает web search по `{}` через {} и объединяет верхние \
результаты reciprocal rank fusion (k = {}). Финальная recursive fetch \
check принимает только страницы с явными упорядоченными или \
инструкционными шагами для `{}`.",
task.task,
task.action,
task.object,
official_docs_gate,
wikihow_candidate,
wikihow_api_url,
search_query,
provider_summary,
WEB_SEARCH_RRF_K,
task.task,
)
}
Language::Hindi => {
let official_docs_gate = if is_install_procedure {
format!(
"इंस्टॉल वाले कामों में पहला source gate उत्पाद की आधिकारिक \
documentation या official repository install page को प्राथमिकता \
देता है; उसके बाद ही community how-to sources देखे जाते हैं. \
Solver official-source web search query `{search_query}` \
पहले चलाता है और general how-to query `{fallback_query}` \
को fallback रखता है. "
)
} else {
String::new()
};
format!(
"`{}` के लिए procedural discovery plan (action `{}`, object `{}`).\n\n\
{}मैं इसे memorized recipe से answer नहीं करता. Solver पहले topic \
context के लिए Wikipedia और entity/action/object hints के लिए \
Wikidata जांचता है. फिर वह candidate `{}` के लिए wikiHow का \
CORS-readable MediaWiki parse API `{}` से आजमाता है. अगर ये \
sources usable steps नहीं देते, fallback `{}` के लिए {} पर \
web search चलाता है और top results को reciprocal rank fusion \
(k = {}) से merge करता है. अंतिम recursive fetch check केवल \
उन pages को स्वीकार करता है जिनमें `{}` के explicit ordered \
या instructional steps हों.",
task.task,
task.action,
task.object,
official_docs_gate,
wikihow_candidate,
wikihow_api_url,
search_query,
provider_summary,
WEB_SEARCH_RRF_K,
task.task,
)
}
Language::Chinese => {
let official_docs_gate = if is_install_procedure {
format!(
"对于安装类任务,第一个 source gate 优先查找产品官方 documentation \
或官方仓库的安装页面,然后才使用社区 how-to 来源。Solver 先运行官方来源 \
web search query `{search_query}`,并把通用 how-to query \
`{fallback_query}` 保留为 fallback。"
)
} else {
String::new()
};
format!(
"`{}` 的过程发现计划(action `{}`, object `{}`)。\n\n\
{}我不会把它当作记忆中的固定 recipe 来回答。Solver 先检查 Wikipedia \
获取主题上下文,再检查 Wikidata 获取 entity/action/object 线索。然后它尝试 \
wikiHow 的 CORS-readable MediaWiki parse API candidate `{}`,URL 为 `{}`。\
如果这些来源没有可用步骤,fallback 会对 `{}` 通过 {} 运行 web search,\
并用 reciprocal rank fusion (k = {}) 合并顶部结果。最后的 recursive \
fetch check 只接受真正包含 `{}` 的明确有序步骤或 instructional steps 的页面。",
task.task,
task.action,
task.object,
official_docs_gate,
wikihow_candidate,
wikihow_api_url,
search_query,
provider_summary,
WEB_SEARCH_RRF_K,
task.task,
)
}
Language::English | Language::Unknown => {
let official_docs_gate = if is_install_procedure {
format!(
"For install tasks, the first source gate prefers the product's \
official documentation or official repository install page \
before community how-to sources. It starts with the \
official-source web search query `{search_query}` and keeps \
the general how-to query `{fallback_query}` as fallback. "
)
} else {
String::new()
};
format!(
"Procedural discovery plan for `{}` (action `{}`, object `{}`).\n\n\
{}I do not answer this from a memoized recipe. The solver first checks \
Wikipedia for topic context and Wikidata for entity/action/object hints. \
It then tries wikiHow's CORS-readable MediaWiki parse API candidate \
`{}` via `{}`. If those sources do not expose usable steps, the fallback \
path runs web search for `{}` across {} and merges the top results with \
reciprocal rank fusion (k = {}). The final recursive fetch check only \
accepts pages that actually contain explicit ordered or instructional \
steps for `{}`.",
task.task,
task.action,
task.object,
official_docs_gate,
wikihow_candidate,
wikihow_api_url,
search_query,
provider_summary,
WEB_SEARCH_RRF_K,
task.task,
)
}
}
}
fn capitalize_word(word: &str) -> String {
let mut chars = word.chars();
let Some(first) = chars.next() else {
return String::new();
};
format!("{}{rest}", first.to_uppercase(), rest = chars.as_str())
}
fn extract_topic_from_prior_reply(reply: &str) -> Option<String> {
let first_line = reply.lines().next().unwrap_or("").trim();
if let Some(paren_pos) = first_line.find('(') {
let candidate = first_line[..paren_pos].trim();
if !candidate.is_empty() {
return Some(candidate.to_lowercase());
}
}
let stop_words = seed::lexicon().role_word_forms(seed::ROLE_TOPIC_SCAN_STOP_WORD);
for word in reply.split_whitespace() {
let clean = word.trim_matches(|c: char| !c.is_alphanumeric());
if clean.len() >= 2 && clean.chars().next().is_some_and(char::is_uppercase) {
let lowered = clean.to_lowercase();
if !stop_words.iter().any(|form| form.text == lowered) {
return Some(lowered);
}
}
}
None
}