use std::collections::{HashMap, HashSet};
use anyhow::Result;
use serde::{Deserialize, Serialize};
use tantivy::schema::Value;
use crate::engine::EngineState;
use crate::graph::{self, GraphFilter};
use crate::search;
use crate::slug::{Slug, WikiUri};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Suggestion {
pub slug: String,
pub uri: String,
pub title: String,
pub r#type: String,
pub score: f32,
pub reason: String,
pub field: String,
}
pub fn suggest(
engine: &EngineState,
slug_or_uri: &str,
wiki_flag: Option<&str>,
limit: Option<usize>,
) -> Result<Vec<Suggestion>> {
let (wiki_name, slug) = if slug_or_uri.starts_with("wiki://") {
let (entry, slug) = WikiUri::resolve(slug_or_uri, wiki_flag, &engine.config)?;
(entry.name, slug)
} else {
let wiki_name = engine.resolve_wiki_name(wiki_flag).to_string();
let slug = Slug::try_from(slug_or_uri)?;
(wiki_name, slug)
};
let space = engine.space(&wiki_name)?;
let resolved = space.resolved_config(&engine.config);
let limit = limit.unwrap_or(resolved.suggest.default_limit as usize);
let min_score = resolved.suggest.min_score;
let searcher = space.index_manager.searcher()?;
let is = &space.index_schema;
let input_doc = find_doc_by_slug(&searcher, is, slug.as_str())?;
let input_tags: HashSet<String> = input_doc.tags.iter().cloned().collect();
let input_type = input_doc.page_type.clone();
let existing_links: HashSet<String> = input_doc.links.iter().cloned().collect();
let mut candidates: HashMap<String, CandidateScore> = HashMap::new();
for tag in &input_tags {
let results = search::search(
tag,
&search::SearchOptions {
no_excerpt: true,
top_k: 20,
..Default::default()
},
&searcher,
&wiki_name,
is,
)?;
for r in &results.results {
if r.slug == slug.as_str() || existing_links.contains(&r.slug) {
continue;
}
let doc = find_doc_by_slug(&searcher, is, &r.slug)?;
let shared: usize = doc.tags.iter().filter(|t| input_tags.contains(*t)).count();
if shared == 0 {
continue;
}
let total = doc.tags.len().max(1);
let score = shared as f32 / total as f32;
let shared_tags: Vec<&str> = doc
.tags
.iter()
.filter(|t| input_tags.contains(*t))
.map(|s| s.as_str())
.collect();
let reason = format!("shares tags: {}", shared_tags.join(", "));
candidates
.entry(r.slug.clone())
.and_modify(|c| {
if score > c.score {
c.score = score;
c.reason = reason.clone();
}
})
.or_insert(CandidateScore {
slug: r.slug.clone(),
title: r.title.clone(),
page_type: doc.page_type.clone(),
score,
reason,
});
}
}
let wiki_graph =
graph::build_graph(&searcher, is, &GraphFilter::default(), &space.type_registry)?;
let slug_to_idx: HashMap<&str, _> = wiki_graph
.node_indices()
.map(|idx| (wiki_graph[idx].slug.as_str(), idx))
.collect();
if let Some(&root_idx) = slug_to_idx.get(slug.as_str()) {
let mut hop1: HashSet<petgraph::graph::NodeIndex> = HashSet::new();
for neighbor in wiki_graph.neighbors_undirected(root_idx) {
hop1.insert(neighbor);
}
for &n1 in &hop1 {
for n2 in wiki_graph.neighbors_undirected(n1) {
if n2 == root_idx || hop1.contains(&n2) {
continue;
}
let node = &wiki_graph[n2];
if existing_links.contains(&node.slug) {
continue;
}
let via = &wiki_graph[n1].slug;
let score = 0.5; let reason = format!("2 hops via {via}");
candidates
.entry(node.slug.clone())
.and_modify(|c| {
if score > c.score {
c.score = score;
c.reason = reason.clone();
}
})
.or_insert(CandidateScore {
slug: node.slug.clone(),
title: node.title.clone(),
page_type: node.r#type.clone(),
score,
reason,
});
}
}
}
let query = format!("{} {}", input_doc.title, input_doc.summary);
if !query.trim().is_empty() {
let results = search::search(
&query,
&search::SearchOptions {
no_excerpt: true,
top_k: 10,
..Default::default()
},
&searcher,
&wiki_name,
is,
)?;
let max_score = results
.results
.first()
.map(|r| r.score)
.unwrap_or(1.0)
.max(0.001);
for r in &results.results {
if r.slug == slug.as_str() || existing_links.contains(&r.slug) {
continue;
}
let score = r.score / max_score * 0.7; let reason = "similar content".to_string();
candidates
.entry(r.slug.clone())
.and_modify(|c| {
if score > c.score {
c.score = score;
c.reason = reason.clone();
}
})
.or_insert_with(|| {
let doc = find_doc_by_slug(&searcher, is, &r.slug).unwrap_or_default();
CandidateScore {
slug: r.slug.clone(),
title: r.title.clone(),
page_type: doc.page_type,
score,
reason,
}
});
}
}
if let Some(community_map) =
graph::node_community_map(&wiki_graph, resolved.graph.min_nodes_for_communities)
&& let Some(&my_community) = community_map.get(slug.as_str())
{
let mut peers: Vec<&str> = community_map
.keys()
.filter(|s| {
let ns: &str = s;
community_map.get(ns).copied() == Some(my_community)
&& ns != slug.as_str()
&& !existing_links.contains(ns)
&& !candidates.contains_key(ns)
})
.map(|s| s.as_str())
.collect();
peers.sort_unstable();
for (added, node_slug) in peers.into_iter().enumerate() {
if added >= resolved.graph.community_suggestions_limit {
break;
}
let doc = find_doc_by_slug(&searcher, is, node_slug)?;
candidates.insert(
node_slug.to_string(),
CandidateScore {
slug: node_slug.to_string(),
title: doc.title.clone(),
page_type: doc.page_type.clone(),
score: 0.4,
reason: "same knowledge cluster".to_string(),
},
);
}
}
let mut ranked: Vec<CandidateScore> = candidates.into_values().collect();
ranked.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
ranked.retain(|c| c.score >= min_score);
ranked.truncate(limit);
let suggestions = ranked
.into_iter()
.map(|c| {
let field = suggest_field(&input_type, &c.page_type, &space.type_registry);
Suggestion {
uri: format!("wiki://{wiki_name}/{}", c.slug),
slug: c.slug,
title: c.title,
r#type: c.page_type,
score: (c.score * 100.0).round() / 100.0,
reason: c.reason,
field,
}
})
.collect();
Ok(suggestions)
}
#[derive(Default)]
struct DocInfo {
title: String,
summary: String,
page_type: String,
tags: Vec<String>,
links: Vec<String>,
}
struct CandidateScore {
slug: String,
title: String,
page_type: String,
score: f32,
reason: String,
}
fn find_doc_by_slug(
searcher: &tantivy::Searcher,
is: &crate::index_schema::IndexSchema,
slug: &str,
) -> Result<DocInfo> {
let f_slug = is.field("slug");
let f_title = is.field("title");
let f_type = is.field("type");
let query = tantivy::query::TermQuery::new(
tantivy::Term::from_field_text(f_slug, slug),
tantivy::schema::IndexRecordOption::Basic,
);
let results = searcher.search(
&query,
&tantivy::collector::TopDocs::with_limit(1).order_by_score(),
)?;
if let Some((_score, addr)) = results.first() {
let doc: tantivy::TantivyDocument = searcher.doc(*addr)?;
let title = doc
.get_first(f_title)
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let page_type = doc
.get_first(f_type)
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let summary = is
.try_field("summary")
.and_then(|f| doc.get_first(f))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let tags: Vec<String> = is
.try_field("tags")
.map(|f| {
doc.get_all(f)
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect()
})
.unwrap_or_default();
let mut links = Vec::new();
for field_name in &["sources", "concepts", "body_links", "document_refs"] {
if let Some(f) = is.try_field(field_name) {
for val in doc.get_all(f) {
if let Some(s) = val.as_str() {
links.push(s.to_string());
}
}
}
}
Ok(DocInfo {
title,
summary,
page_type,
tags,
links,
})
} else {
Ok(DocInfo::default())
}
}
fn suggest_field(
page_type: &str,
candidate_type: &str,
registry: &crate::type_registry::SpaceTypeRegistry,
) -> String {
let source_types = [
"paper",
"article",
"documentation",
"clipping",
"transcript",
"note",
"data",
"book-chapter",
"thread",
];
let is_source = |t: &str| source_types.contains(&t);
for edge in registry.edges(page_type) {
let targets = &edge.target_types;
if targets.iter().any(|t| t == candidate_type) {
return edge.field.clone();
}
if is_source(candidate_type) && targets.iter().any(|t| is_source(t)) {
return edge.field.clone();
}
}
"[[wikilink]]".to_string()
}