1use std::collections::{HashMap, HashSet};
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use tantivy::schema::Value;
6
7use crate::engine::EngineState;
8use crate::graph::{GraphFilter, get_cached_community_map, get_or_build_graph};
9use crate::search;
10use crate::slug::{Slug, WikiUri};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct Suggestion {
15 pub slug: String,
17 pub uri: String,
19 pub title: String,
21 pub r#type: String,
23 pub score: f32,
25 pub reason: String,
27 pub field: String,
29}
30
31pub fn suggest(
33 engine: &EngineState,
34 slug_or_uri: &str,
35 wiki_flag: Option<&str>,
36 limit: Option<usize>,
37) -> Result<Vec<Suggestion>> {
38 let (wiki_name, slug) = if slug_or_uri.starts_with("wiki://") {
39 let (entry, slug) = WikiUri::resolve(slug_or_uri, wiki_flag, &engine.config)?;
40 (entry.name, slug)
41 } else {
42 let wiki_name = engine.resolve_wiki_name(wiki_flag).to_string();
43 let slug = Slug::try_from(slug_or_uri)?;
44 (wiki_name, slug)
45 };
46
47 let space = engine.space(&wiki_name)?;
48 let resolved = space.resolved_config(&engine.config);
49 let limit = limit.unwrap_or(resolved.suggest.default_limit as usize);
50 let min_score = resolved.suggest.min_score;
51
52 let searcher = space.index_manager.searcher()?;
53 let is = &space.index_schema;
54
55 let input_doc = find_doc_by_slug(&searcher, is, slug.as_str())?;
57 let input_tags: HashSet<String> = input_doc.tags.iter().cloned().collect();
58 let input_type = input_doc.page_type.clone();
59 let existing_links: HashSet<String> = input_doc.links.iter().cloned().collect();
60
61 let mut candidates: HashMap<String, CandidateScore> = HashMap::new();
62
63 for tag in &input_tags {
65 let results = search::search(
66 tag,
67 &search::SearchOptions {
68 no_excerpt: true,
69 top_k: 20,
70 ..Default::default()
71 },
72 &searcher,
73 &wiki_name,
74 is,
75 )?;
76 for r in &results.results {
77 if r.slug == slug.as_str() || existing_links.contains(&r.slug) {
78 continue;
79 }
80 let doc = find_doc_by_slug(&searcher, is, &r.slug)?;
81 let shared: usize = doc.tags.iter().filter(|t| input_tags.contains(*t)).count();
82 if shared == 0 {
83 continue;
84 }
85 let total = doc.tags.len().max(1);
86 let score = shared as f32 / total as f32;
87 let shared_tags: Vec<&str> = doc
88 .tags
89 .iter()
90 .filter(|t| input_tags.contains(*t))
91 .map(|s| s.as_str())
92 .collect();
93 let reason = format!("shares tags: {}", shared_tags.join(", "));
94 candidates
95 .entry(r.slug.clone())
96 .and_modify(|c| {
97 if score > c.score {
98 c.score = score;
99 c.reason = reason.clone();
100 }
101 })
102 .or_insert(CandidateScore {
103 slug: r.slug.clone(),
104 title: r.title.clone(),
105 page_type: doc.page_type.clone(),
106 score,
107 reason,
108 });
109 }
110 }
111
112 let wiki_graph = get_or_build_graph(
114 is,
115 &space.type_registry,
116 &space.index_manager,
117 &space.graph_cache,
118 &searcher,
119 &GraphFilter::default(),
120 )?;
121 let slug_to_idx: HashMap<&str, _> = wiki_graph
122 .node_indices()
123 .map(|idx| (wiki_graph[idx].slug.as_str(), idx))
124 .collect();
125
126 if let Some(&root_idx) = slug_to_idx.get(slug.as_str()) {
127 let mut hop1: HashSet<petgraph::graph::NodeIndex> = HashSet::new();
129 for neighbor in wiki_graph.neighbors_undirected(root_idx) {
130 hop1.insert(neighbor);
131 }
132 for &n1 in &hop1 {
133 for n2 in wiki_graph.neighbors_undirected(n1) {
134 if n2 == root_idx || hop1.contains(&n2) {
135 continue;
136 }
137 let node = &wiki_graph[n2];
138 if existing_links.contains(&node.slug) {
139 continue;
140 }
141 let via = &wiki_graph[n1].slug;
142 let score = 0.5; let reason = format!("2 hops via {via}");
144 candidates
145 .entry(node.slug.clone())
146 .and_modify(|c| {
147 if score > c.score {
148 c.score = score;
149 c.reason = reason.clone();
150 }
151 })
152 .or_insert(CandidateScore {
153 slug: node.slug.clone(),
154 title: node.title.clone(),
155 page_type: node.r#type.clone(),
156 score,
157 reason,
158 });
159 }
160 }
161 }
162
163 let query = format!("{} {}", input_doc.title, input_doc.summary);
165 if !query.trim().is_empty() {
166 let results = search::search(
167 &query,
168 &search::SearchOptions {
169 no_excerpt: true,
170 top_k: 10,
171 ..Default::default()
172 },
173 &searcher,
174 &wiki_name,
175 is,
176 )?;
177 let max_score = results
178 .results
179 .first()
180 .map(|r| r.score)
181 .unwrap_or(1.0)
182 .max(0.001);
183 for r in &results.results {
184 if r.slug == slug.as_str() || existing_links.contains(&r.slug) {
185 continue;
186 }
187 let score = r.score / max_score * 0.7; let reason = "similar content".to_string();
189 candidates
190 .entry(r.slug.clone())
191 .and_modify(|c| {
192 if score > c.score {
193 c.score = score;
194 c.reason = reason.clone();
195 }
196 })
197 .or_insert_with(|| {
198 let doc = find_doc_by_slug(&searcher, is, &r.slug).unwrap_or_default();
199 CandidateScore {
200 slug: r.slug.clone(),
201 title: r.title.clone(),
202 page_type: doc.page_type,
203 score,
204 reason,
205 }
206 });
207 }
208 }
209
210 if let Some(community_map) = get_cached_community_map(
212 &space.index_schema,
213 &space.type_registry,
214 &space.index_manager,
215 &space.graph_cache,
216 &space.community_cache,
217 &searcher,
218 resolved.graph.min_nodes_for_communities,
219 )? && let Some(&my_community) = community_map.get(slug.as_str())
220 {
221 let mut peers: Vec<&str> = community_map
222 .keys()
223 .filter(|s| {
224 let ns: &str = s;
225 community_map.get(ns).copied() == Some(my_community)
226 && ns != slug.as_str()
227 && !existing_links.contains(ns)
228 && !candidates.contains_key(ns)
229 })
230 .map(|s| s.as_str())
231 .collect();
232 peers.sort_unstable();
233 for (added, node_slug) in peers.into_iter().enumerate() {
234 if added >= resolved.graph.community_suggestions_limit {
235 break;
236 }
237 let doc = find_doc_by_slug(&searcher, is, node_slug)?;
238 candidates.insert(
239 node_slug.to_string(),
240 CandidateScore {
241 slug: node_slug.to_string(),
242 title: doc.title.clone(),
243 page_type: doc.page_type.clone(),
244 score: 0.4,
245 reason: "same knowledge cluster".to_string(),
246 },
247 );
248 }
249 }
250
251 let mut ranked: Vec<CandidateScore> = candidates.into_values().collect();
253 ranked.sort_by(|a, b| {
254 b.score
255 .partial_cmp(&a.score)
256 .unwrap_or(std::cmp::Ordering::Equal)
257 });
258 ranked.retain(|c| c.score >= min_score);
259 ranked.truncate(limit);
260
261 let suggestions = ranked
263 .into_iter()
264 .map(|c| {
265 let field = suggest_field(&input_type, &c.page_type, &space.type_registry);
266 Suggestion {
267 uri: format!("wiki://{wiki_name}/{}", c.slug),
268 slug: c.slug,
269 title: c.title,
270 r#type: c.page_type,
271 score: (c.score * 100.0).round() / 100.0,
272 reason: c.reason,
273 field,
274 }
275 })
276 .collect();
277
278 Ok(suggestions)
279}
280
281#[derive(Default)]
284struct DocInfo {
285 title: String,
286 summary: String,
287 page_type: String,
288 tags: Vec<String>,
289 links: Vec<String>,
290}
291
292struct CandidateScore {
293 slug: String,
294 title: String,
295 page_type: String,
296 score: f32,
297 reason: String,
298}
299
300fn find_doc_by_slug(
301 searcher: &tantivy::Searcher,
302 is: &crate::index_schema::IndexSchema,
303 slug: &str,
304) -> Result<DocInfo> {
305 let f_slug = is.field("slug");
306 let f_title = is.field("title");
307 let f_type = is.field("type");
308
309 let query = tantivy::query::TermQuery::new(
310 tantivy::Term::from_field_text(f_slug, slug),
311 tantivy::schema::IndexRecordOption::Basic,
312 );
313 let results = searcher.search(
314 &query,
315 &tantivy::collector::TopDocs::with_limit(1).order_by_score(),
316 )?;
317
318 if let Some((_score, addr)) = results.first() {
319 let doc: tantivy::TantivyDocument = searcher.doc(*addr)?;
320 let title = doc
321 .get_first(f_title)
322 .and_then(|v| v.as_str())
323 .unwrap_or("")
324 .to_string();
325 let page_type = doc
326 .get_first(f_type)
327 .and_then(|v| v.as_str())
328 .unwrap_or("")
329 .to_string();
330 let summary = is
331 .try_field("summary")
332 .and_then(|f| doc.get_first(f))
333 .and_then(|v| v.as_str())
334 .unwrap_or("")
335 .to_string();
336 let tags: Vec<String> = is
337 .try_field("tags")
338 .map(|f| {
339 doc.get_all(f)
340 .filter_map(|v| v.as_str().map(|s| s.to_string()))
341 .collect()
342 })
343 .unwrap_or_default();
344
345 let mut links = Vec::new();
347 for field_name in &["sources", "concepts", "body_links", "document_refs"] {
348 if let Some(f) = is.try_field(field_name) {
349 for val in doc.get_all(f) {
350 if let Some(s) = val.as_str() {
351 links.push(s.to_string());
352 }
353 }
354 }
355 }
356
357 Ok(DocInfo {
358 title,
359 summary,
360 page_type,
361 tags,
362 links,
363 })
364 } else {
365 Ok(DocInfo::default())
366 }
367}
368
369fn suggest_field(
370 page_type: &str,
371 candidate_type: &str,
372 registry: &crate::type_registry::SpaceTypeRegistry,
373) -> String {
374 let source_types = [
375 "paper",
376 "article",
377 "documentation",
378 "clipping",
379 "transcript",
380 "note",
381 "data",
382 "book-chapter",
383 "thread",
384 ];
385 let is_source = |t: &str| source_types.contains(&t);
386
387 for edge in registry.edges(page_type) {
388 let targets = &edge.target_types;
389 if targets.iter().any(|t| t == candidate_type) {
390 return edge.field.clone();
391 }
392 if is_source(candidate_type) && targets.iter().any(|t| is_source(t)) {
393 return edge.field.clone();
394 }
395 }
396
397 "[[wikilink]]".to_string()
398}