1use std::collections::{HashMap, HashSet, VecDeque};
2
3use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
4use nucleo_matcher::{Config, Matcher, Utf32Str};
5
6use crate::graph::{Edge, GraphFile, Node, Note};
7use crate::index::Bm25Index;
8
9const BM25_K1: f64 = 1.5;
10const BM25_B: f64 = 0.75;
11const DEFAULT_TARGET_CHARS: usize = 1400;
12const MIN_TARGET_CHARS: usize = 300;
13const MAX_TARGET_CHARS: usize = 12_000;
14const FUZZY_NEIGHBOR_CONTEXT_CAP: u32 = 220;
15const FUZZY_NO_PRIMARY_CONTEXT_DIVISOR: u32 = 3;
16const FUZZY_DESCRIPTION_WEIGHT: u32 = 2;
17const FUZZY_FACT_WEIGHT: u32 = 2;
18const FUZZY_NOTE_BODY_WEIGHT: u32 = 1;
19const FUZZY_NOTE_TAG_WEIGHT: u32 = 2;
20const BM25_PHRASE_MATCH_BOOST: i64 = 120;
21const BM25_TOKEN_MATCH_BOOST: i64 = 45;
22const BM25_ID_WEIGHT: usize = 4;
23const BM25_NAME_WEIGHT: usize = 3;
24const BM25_ALIAS_WEIGHT: usize = 2;
25const BM25_DESCRIPTION_WEIGHT: usize = 2;
26const BM25_FACT_WEIGHT: usize = 2;
27const BM25_NOTE_BODY_WEIGHT: usize = 1;
28const BM25_NOTE_TAG_WEIGHT: usize = 1;
29const BM25_NEIGHBOR_WEIGHT: usize = 1;
30const IMPORTANCE_NEUTRAL: i64 = 4;
31const IMPORTANCE_STEP_BOOST: i64 = 22;
32const SCORE_META_MAX_RATIO: f64 = 0.35;
33const SCORE_META_MIN_CAP: i64 = 30;
34const SCORE_META_MAX_CAP: i64 = 240;
35
36#[derive(Debug, Clone, Copy)]
37pub enum FindMode {
38 Fuzzy,
39 Bm25,
40}
41
42#[derive(Clone, Copy)]
43struct ScoredNode<'a> {
44 score: i64,
45 node: &'a Node,
46 breakdown: ScoreBreakdown,
47}
48
49#[derive(Debug, Clone, Copy)]
50struct ScoreBreakdown {
51 raw_relevance: f64,
52 normalized_relevance: i64,
53 lexical_boost: i64,
54 feedback_boost: i64,
55 importance_boost: i64,
56 authority_raw: i64,
57 authority_applied: i64,
58 authority_cap: i64,
59}
60
61struct RawCandidate<'a> {
62 node: &'a Node,
63 raw_relevance: f64,
64 lexical_boost: i64,
65}
66
67struct FindQueryContext<'a> {
68 notes_by_node: HashMap<&'a str, Vec<&'a Note>>,
69 neighbors_by_node: HashMap<&'a str, Vec<&'a Node>>,
70}
71
72impl<'a> FindQueryContext<'a> {
73 fn build(graph: &'a GraphFile) -> Self {
74 let node_by_id: HashMap<&'a str, &'a Node> = graph
75 .nodes
76 .iter()
77 .map(|node| (node.id.as_str(), node))
78 .collect();
79
80 let mut notes_by_node: HashMap<&'a str, Vec<&'a Note>> = HashMap::new();
81 for note in &graph.notes {
82 notes_by_node
83 .entry(note.node_id.as_str())
84 .or_default()
85 .push(note);
86 }
87
88 let mut neighbors_by_node: HashMap<&'a str, Vec<&'a Node>> = HashMap::new();
89 for edge in &graph.edges {
90 if let (Some(source), Some(target)) = (
91 node_by_id.get(edge.source_id.as_str()),
92 node_by_id.get(edge.target_id.as_str()),
93 ) {
94 neighbors_by_node
95 .entry(source.id.as_str())
96 .or_default()
97 .push(*target);
98 neighbors_by_node
99 .entry(target.id.as_str())
100 .or_default()
101 .push(*source);
102 }
103 }
104
105 for neighbors in neighbors_by_node.values_mut() {
106 neighbors.sort_by(|left, right| left.id.cmp(&right.id));
107 neighbors.dedup_by(|left, right| left.id == right.id);
108 }
109
110 Self {
111 notes_by_node,
112 neighbors_by_node,
113 }
114 }
115
116 fn notes_for(&self, node_id: &str) -> &[&'a Note] {
117 self.notes_by_node
118 .get(node_id)
119 .map(Vec::as_slice)
120 .unwrap_or(&[])
121 }
122
123 fn neighbors_for(&self, node_id: &str) -> &[&'a Node] {
124 self.neighbors_by_node
125 .get(node_id)
126 .map(Vec::as_slice)
127 .unwrap_or(&[])
128 }
129}
130
131#[derive(Debug, Clone)]
132pub struct ScoreBreakdownResult {
133 pub raw_relevance: f64,
134 pub normalized_relevance: i64,
135 pub lexical_boost: i64,
136 pub feedback_boost: i64,
137 pub importance_boost: i64,
138 pub authority_raw: i64,
139 pub authority_applied: i64,
140 pub authority_cap: i64,
141}
142
143#[derive(Debug, Clone)]
144pub struct ScoredNodeResult {
145 pub score: i64,
146 pub node: Node,
147 pub breakdown: ScoreBreakdownResult,
148}
149
150pub fn render_find(
151 graph: &GraphFile,
152 queries: &[String],
153 limit: usize,
154 include_features: bool,
155 mode: FindMode,
156 full: bool,
157) -> String {
158 render_find_with_index(
159 graph,
160 queries,
161 limit,
162 include_features,
163 mode,
164 full,
165 false,
166 None,
167 )
168}
169
170pub fn render_find_with_index(
171 graph: &GraphFile,
172 queries: &[String],
173 limit: usize,
174 include_features: bool,
175 mode: FindMode,
176 full: bool,
177 debug_score: bool,
178 index: Option<&Bm25Index>,
179) -> String {
180 let mut sections = Vec::new();
181 for query in queries {
182 let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
183 let total = matches.len();
184 let visible: Vec<_> = matches.into_iter().take(limit).collect();
185 let shown = visible.len();
186 let mut lines = vec![render_result_header(query, shown, total)];
187 for scored in visible {
188 lines.push(render_scored_node_block(graph, &scored, full, debug_score));
189 }
190 push_limit_omission_line(&mut lines, shown, total);
191 sections.push(lines.join("\n"));
192 }
193 format!("{}\n", sections.join("\n\n"))
194}
195
196pub fn find_nodes(
197 graph: &GraphFile,
198 query: &str,
199 limit: usize,
200 include_features: bool,
201 mode: FindMode,
202) -> Vec<Node> {
203 find_matches_with_index(graph, query, limit, include_features, mode, None)
204 .into_iter()
205 .map(|item| item.node.clone())
206 .collect()
207}
208
209pub fn find_nodes_with_index(
210 graph: &GraphFile,
211 query: &str,
212 limit: usize,
213 include_features: bool,
214 mode: FindMode,
215 index: Option<&Bm25Index>,
216) -> Vec<Node> {
217 find_matches_with_index(graph, query, limit, include_features, mode, index)
218 .into_iter()
219 .map(|item| item.node.clone())
220 .collect()
221}
222
223pub fn find_nodes_and_total_with_index(
224 graph: &GraphFile,
225 query: &str,
226 limit: usize,
227 include_features: bool,
228 mode: FindMode,
229 index: Option<&Bm25Index>,
230) -> (usize, Vec<Node>) {
231 let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
232 let total = matches.len();
233 let nodes = matches
234 .into_iter()
235 .take(limit)
236 .map(|item| item.node.clone())
237 .collect();
238 (total, nodes)
239}
240
241pub fn find_scored_nodes_and_total_with_index(
242 graph: &GraphFile,
243 query: &str,
244 limit: usize,
245 include_features: bool,
246 mode: FindMode,
247 index: Option<&Bm25Index>,
248) -> (usize, Vec<ScoredNodeResult>) {
249 let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
250 let total = matches.len();
251 let nodes = matches
252 .into_iter()
253 .take(limit)
254 .map(|item| ScoredNodeResult {
255 score: item.score,
256 node: item.node.clone(),
257 breakdown: ScoreBreakdownResult {
258 raw_relevance: item.breakdown.raw_relevance,
259 normalized_relevance: item.breakdown.normalized_relevance,
260 lexical_boost: item.breakdown.lexical_boost,
261 feedback_boost: item.breakdown.feedback_boost,
262 importance_boost: item.breakdown.importance_boost,
263 authority_raw: item.breakdown.authority_raw,
264 authority_applied: item.breakdown.authority_applied,
265 authority_cap: item.breakdown.authority_cap,
266 },
267 })
268 .collect();
269 (total, nodes)
270}
271
272pub fn count_find_results(
273 graph: &GraphFile,
274 queries: &[String],
275 limit: usize,
276 include_features: bool,
277 mode: FindMode,
278) -> usize {
279 count_find_results_with_index(graph, queries, limit, include_features, mode, None)
280}
281
282pub fn count_find_results_with_index(
283 graph: &GraphFile,
284 queries: &[String],
285 _limit: usize,
286 include_features: bool,
287 mode: FindMode,
288 index: Option<&Bm25Index>,
289) -> usize {
290 let mut total = 0;
291 for query in queries {
292 total += find_all_matches_with_index(graph, query, include_features, mode, index).len();
293 }
294 total
295}
296
297pub fn render_node(graph: &GraphFile, node: &Node, full: bool) -> String {
298 format!("{}\n", render_node_block(graph, node, full))
299}
300
301pub fn render_node_adaptive(graph: &GraphFile, node: &Node, target_chars: Option<usize>) -> String {
302 let target = clamp_target_chars(target_chars);
303 let full = format!("{}\n", render_node_block(graph, node, true));
304 if fits_target_chars(&full, target) {
305 return full;
306 }
307 let mut candidates = Vec::new();
308 for (depth, detail, edge_cap) in [
309 (0usize, DetailLevel::Rich, 8usize),
310 (1usize, DetailLevel::Rich, 8usize),
311 (2usize, DetailLevel::Rich, 6usize),
312 (2usize, DetailLevel::Compact, 6usize),
313 (2usize, DetailLevel::Minimal, 2usize),
314 ] {
315 let rendered = render_single_node_candidate(graph, node, depth, detail, edge_cap);
316 candidates.push(Candidate {
317 rendered,
318 depth,
319 detail,
320 shown_nodes: 1 + depth,
321 });
322 }
323 pick_best_candidate(candidates, target)
324}
325
326pub fn render_find_adaptive_with_index(
327 graph: &GraphFile,
328 queries: &[String],
329 limit: usize,
330 include_features: bool,
331 mode: FindMode,
332 target_chars: Option<usize>,
333 debug_score: bool,
334 index: Option<&Bm25Index>,
335) -> String {
336 let target = clamp_target_chars(target_chars);
337 let mut sections = Vec::new();
338 for query in queries {
339 let matches = find_all_matches_with_index(graph, query, include_features, mode, index);
340 let total = matches.len();
341 let visible: Vec<_> = matches.into_iter().take(limit).collect();
342 let section = if visible.len() == 1 {
343 render_single_result_section(graph, query, &visible[0], total, target, debug_score)
344 } else {
345 render_multi_result_section(graph, query, &visible, total, target, debug_score)
346 };
347 sections.push(section);
348 }
349 format!("{}\n", sections.join("\n\n"))
350}
351
352#[derive(Clone, Copy)]
353enum DetailLevel {
354 Rich,
355 Compact,
356 Minimal,
357}
358
359struct Candidate {
360 rendered: String,
361 depth: usize,
362 detail: DetailLevel,
363 shown_nodes: usize,
364}
365
366impl DetailLevel {
367 fn utility_bonus(self) -> usize {
368 match self {
369 DetailLevel::Rich => 20,
370 DetailLevel::Compact => 10,
371 DetailLevel::Minimal => 0,
372 }
373 }
374}
375
376fn clamp_target_chars(target_chars: Option<usize>) -> usize {
377 target_chars
378 .unwrap_or(DEFAULT_TARGET_CHARS)
379 .clamp(MIN_TARGET_CHARS, MAX_TARGET_CHARS)
380}
381
382fn render_single_result_section(
383 graph: &GraphFile,
384 query: &str,
385 node: &ScoredNode<'_>,
386 total_available: usize,
387 target: usize,
388 debug_score: bool,
389) -> String {
390 let header = render_result_header(query, 1, total_available);
391 let full = render_single_result_candidate(
392 graph,
393 &header,
394 node,
395 total_available,
396 0,
397 DetailLevel::Rich,
398 8,
399 true,
400 debug_score,
401 );
402 if fits_target_chars(&full, target) {
403 return full.trim_end().to_owned();
404 }
405 let mut candidates = Vec::new();
406 for (depth, detail, edge_cap) in [
407 (0usize, DetailLevel::Rich, 8usize),
408 (1usize, DetailLevel::Rich, 8usize),
409 (2usize, DetailLevel::Rich, 6usize),
410 (2usize, DetailLevel::Compact, 6usize),
411 (2usize, DetailLevel::Minimal, 2usize),
412 ] {
413 candidates.push(Candidate {
414 rendered: render_single_result_candidate(
415 graph,
416 &header,
417 node,
418 total_available,
419 depth,
420 detail,
421 edge_cap,
422 false,
423 debug_score,
424 ),
425 depth,
426 detail,
427 shown_nodes: 1 + depth,
428 });
429 }
430 pick_best_candidate(candidates, target)
431 .trim_end()
432 .to_owned()
433}
434
435fn render_multi_result_section(
436 graph: &GraphFile,
437 query: &str,
438 nodes: &[ScoredNode<'_>],
439 total_available: usize,
440 target: usize,
441 debug_score: bool,
442) -> String {
443 let visible_total = nodes.len();
444 let full = render_full_result_section(graph, query, nodes, total_available, debug_score);
445 if fits_target_chars(&full, target) {
446 return full;
447 }
448 let mut candidates = Vec::new();
449 let full_cap = visible_total;
450 let mid_cap = full_cap.min(5);
451 let low_cap = full_cap.min(3);
452
453 for (detail, edge_cap, result_cap, depth) in [
454 (DetailLevel::Rich, 4usize, full_cap.min(4), 0usize),
455 (DetailLevel::Compact, 3usize, full_cap, 0usize),
456 (DetailLevel::Rich, 2usize, mid_cap, 1usize),
457 (DetailLevel::Compact, 1usize, full_cap, 0usize),
458 (DetailLevel::Minimal, 1usize, mid_cap, 0usize),
459 (DetailLevel::Minimal, 0usize, low_cap, 0usize),
460 (DetailLevel::Minimal, 0usize, low_cap.min(2), 1usize),
461 ] {
462 let shown = result_cap.min(nodes.len());
463 let mut lines = vec![render_result_header(query, shown, total_available)];
464 for node in nodes.iter().take(shown) {
465 lines.extend(render_scored_node_candidate_lines(
466 graph,
467 node,
468 0,
469 detail,
470 edge_cap,
471 debug_score,
472 ));
473 if depth > 0 {
474 lines.extend(render_neighbor_layers(graph, node.node, depth, detail));
475 }
476 }
477 if visible_total > shown {
478 lines.push(format!("... +{} more nodes omitted", visible_total - shown));
479 }
480 push_limit_omission_line(&mut lines, visible_total, total_available);
481 candidates.push(Candidate {
482 rendered: format!("{}\n", lines.join("\n")),
483 depth,
484 detail,
485 shown_nodes: shown,
486 });
487 }
488
489 pick_best_candidate(candidates, target)
490 .trim_end()
491 .to_owned()
492}
493
494fn pick_best_candidate(candidates: Vec<Candidate>, target: usize) -> String {
495 let lower = (target as f64 * 0.7) as usize;
496 let mut best: Option<(usize, usize, usize, usize, String)> = None;
497
498 for candidate in candidates {
499 let chars = candidate.rendered.chars().count();
500 let overshoot = chars.saturating_sub(target);
501 let undershoot = lower.saturating_sub(chars);
502 let penalty = overshoot.saturating_mul(10).saturating_add(undershoot);
503 let utility = candidate
504 .depth
505 .saturating_mul(100)
506 .saturating_add(candidate.shown_nodes.saturating_mul(5))
507 .saturating_add(candidate.detail.utility_bonus());
508
509 let entry = (
510 penalty,
511 overshoot,
512 usize::MAX - utility,
513 usize::MAX - chars,
514 candidate.rendered,
515 );
516 if best.as_ref().is_none_or(|current| {
517 entry.0 < current.0
518 || (entry.0 == current.0 && entry.1 < current.1)
519 || (entry.0 == current.0 && entry.1 == current.1 && entry.2 < current.2)
520 || (entry.0 == current.0
521 && entry.1 == current.1
522 && entry.2 == current.2
523 && entry.3 < current.3)
524 }) {
525 best = Some(entry);
526 }
527 }
528
529 best.map(|item| item.4).unwrap_or_else(|| "\n".to_owned())
530}
531
532fn render_full_result_section(
533 graph: &GraphFile,
534 query: &str,
535 nodes: &[ScoredNode<'_>],
536 total_available: usize,
537 debug_score: bool,
538) -> String {
539 let mut lines = vec![render_result_header(query, nodes.len(), total_available)];
540 for node in nodes {
541 lines.push(render_scored_node_block(graph, node, true, debug_score));
542 }
543 push_limit_omission_line(&mut lines, nodes.len(), total_available);
544 lines.join("\n")
545}
546
547fn render_result_header(query: &str, shown: usize, total: usize) -> String {
548 let query = escape_cli_text(query);
549 if shown < total {
550 format!("? {query} ({shown}/{total})")
551 } else {
552 format!("? {query} ({total})")
553 }
554}
555
556fn push_limit_omission_line(lines: &mut Vec<String>, shown: usize, total: usize) {
557 let omitted = total.saturating_sub(shown);
558 if omitted > 0 {
559 lines.push(format!("... {omitted} more nodes omitted by limit"));
560 }
561}
562
563fn fits_target_chars(rendered: &str, target: usize) -> bool {
564 rendered.chars().count() <= target
565}
566
567fn render_single_node_candidate(
568 graph: &GraphFile,
569 node: &Node,
570 depth: usize,
571 detail: DetailLevel,
572 edge_cap: usize,
573) -> String {
574 let lines = render_single_node_candidate_lines(graph, node, depth, detail, edge_cap);
575 format!("{}\n", lines.join("\n"))
576}
577
578fn render_single_result_candidate(
579 graph: &GraphFile,
580 header: &str,
581 node: &ScoredNode<'_>,
582 total_available: usize,
583 depth: usize,
584 detail: DetailLevel,
585 edge_cap: usize,
586 full: bool,
587 debug_score: bool,
588) -> String {
589 let mut lines = vec![header.to_owned()];
590 if full {
591 lines.push(render_scored_node_block(graph, node, true, debug_score));
592 } else {
593 lines.extend(render_scored_node_candidate_lines(
594 graph,
595 node,
596 depth,
597 detail,
598 edge_cap,
599 debug_score,
600 ));
601 }
602 push_limit_omission_line(&mut lines, 1, total_available);
603 format!("{}\n", lines.join("\n"))
604}
605
606fn render_single_node_candidate_lines(
607 graph: &GraphFile,
608 node: &Node,
609 depth: usize,
610 detail: DetailLevel,
611 edge_cap: usize,
612) -> Vec<String> {
613 let mut lines = render_node_lines_with_edges(graph, node, detail, edge_cap);
614 if depth > 0 {
615 lines.extend(render_neighbor_layers(graph, node, depth, detail));
616 }
617 lines
618}
619
620fn render_scored_node_candidate_lines(
621 graph: &GraphFile,
622 node: &ScoredNode<'_>,
623 depth: usize,
624 detail: DetailLevel,
625 edge_cap: usize,
626 debug_score: bool,
627) -> Vec<String> {
628 let mut lines = vec![format!("score: {}", node.score)];
629 if debug_score {
630 lines.push(render_score_debug_line(node));
631 }
632 lines.extend(render_single_node_candidate_lines(
633 graph, node.node, depth, detail, edge_cap,
634 ));
635 lines
636}
637
638fn render_scored_node_block(
639 graph: &GraphFile,
640 node: &ScoredNode<'_>,
641 full: bool,
642 debug_score: bool,
643) -> String {
644 if debug_score {
645 format!(
646 "score: {}\n{}\n{}",
647 node.score,
648 render_score_debug_line(node),
649 render_node_block(graph, node.node, full)
650 )
651 } else {
652 format!(
653 "score: {}\n{}",
654 node.score,
655 render_node_block(graph, node.node, full)
656 )
657 }
658}
659
660fn render_score_debug_line(node: &ScoredNode<'_>) -> String {
661 format!(
662 "score_debug: raw_relevance={:.3} normalized_relevance={} lexical_boost={} feedback_boost={} importance_boost={} authority_raw={} authority_applied={} authority_cap={}",
663 node.breakdown.raw_relevance,
664 node.breakdown.normalized_relevance,
665 node.breakdown.lexical_boost,
666 node.breakdown.feedback_boost,
667 node.breakdown.importance_boost,
668 node.breakdown.authority_raw,
669 node.breakdown.authority_applied,
670 node.breakdown.authority_cap,
671 )
672}
673
674fn render_neighbor_layers(
675 graph: &GraphFile,
676 root: &Node,
677 max_depth: usize,
678 detail: DetailLevel,
679) -> Vec<String> {
680 let mut out = Vec::new();
681 let mut seen: HashSet<String> = HashSet::from([root.id.clone()]);
682 let mut queue: VecDeque<(String, usize)> = VecDeque::from([(root.id.clone(), 0usize)]);
683 let mut layers: Vec<Vec<&Node>> = vec![Vec::new(); max_depth + 1];
684
685 while let Some((node_id, depth)) = queue.pop_front() {
686 if depth >= max_depth {
687 continue;
688 }
689 for incident in incident_edges(graph, &node_id) {
690 if seen.insert(incident.related.id.clone()) {
691 let next_depth = depth + 1;
692 if next_depth <= max_depth {
693 layers[next_depth].push(incident.related);
694 queue.push_back((incident.related.id.clone(), next_depth));
695 }
696 }
697 }
698 }
699
700 for depth in 1..=max_depth {
701 if layers[depth].is_empty() {
702 continue;
703 }
704 let cap = match detail {
705 DetailLevel::Rich => 6,
706 DetailLevel::Compact => 4,
707 DetailLevel::Minimal => 3,
708 };
709 let shown = layers[depth].len().min(cap);
710 out.push(format!(
711 "depth {depth}: {shown}/{} neighbors",
712 layers[depth].len()
713 ));
714 for node in layers[depth].iter().take(shown) {
715 out.extend(render_node_identity_lines(node, detail));
716 }
717 if layers[depth].len() > shown {
718 out.push(format!(
719 "... +{} more neighbors omitted",
720 layers[depth].len() - shown
721 ));
722 }
723 }
724
725 out
726}
727
728fn render_node_lines_with_edges(
729 graph: &GraphFile,
730 node: &Node,
731 detail: DetailLevel,
732 edge_cap: usize,
733) -> Vec<String> {
734 let mut lines = render_node_identity_lines(node, detail);
735 lines.extend(render_node_link_lines(graph, node, edge_cap));
736 lines
737}
738
739fn render_node_identity_lines(node: &Node, detail: DetailLevel) -> Vec<String> {
740 let mut lines = Vec::new();
741 match detail {
742 DetailLevel::Rich => {
743 lines.push(format!(
744 "# {} | {} [{}]",
745 node.id,
746 escape_cli_text(&node.name),
747 node.r#type
748 ));
749 if !node.properties.alias.is_empty() {
750 lines.push(format!(
751 "aka: {}",
752 node.properties
753 .alias
754 .iter()
755 .map(|alias| escape_cli_text(alias))
756 .collect::<Vec<_>>()
757 .join(", ")
758 ));
759 }
760 push_description_line(&mut lines, &node.properties.description, None);
761 let shown_facts = node.properties.key_facts.len().min(3);
762 for fact in node.properties.key_facts.iter().take(shown_facts) {
763 lines.push(format!("- {}", escape_cli_text(fact)));
764 }
765 let omitted = node.properties.key_facts.len().saturating_sub(shown_facts);
766 if omitted > 0 {
767 lines.push(format!("... {omitted} more facts omitted"));
768 }
769 }
770 DetailLevel::Compact => {
771 lines.push(format!(
772 "# {} | {} [{}]",
773 node.id,
774 escape_cli_text(&node.name),
775 node.r#type
776 ));
777 push_description_line(&mut lines, &node.properties.description, Some(140));
778 if let Some(fact) = node.properties.key_facts.first() {
779 lines.push(format!("- {}", escape_cli_text(fact)));
780 }
781 }
782 DetailLevel::Minimal => {
783 lines.push(format!(
784 "# {} | {} [{}]",
785 node.id,
786 escape_cli_text(&node.name),
787 node.r#type
788 ));
789 }
790 }
791 lines
792}
793
794fn render_node_link_lines(graph: &GraphFile, node: &Node, edge_cap: usize) -> Vec<String> {
795 let incident = incident_edges(graph, &node.id);
796 if incident.is_empty() {
797 return Vec::new();
798 }
799
800 let mut lines = Vec::new();
801 if incident.len() > 12 {
802 lines.push(format!("links: {} total", incident.len()));
803 let (out_summary, in_summary) = summarize_relations(&incident);
804 if !out_summary.is_empty() {
805 lines.push(format!("out: {out_summary}"));
806 }
807 if !in_summary.is_empty() {
808 lines.push(format!("in: {in_summary}"));
809 }
810 }
811
812 let shown = incident.len().min(edge_cap);
813 for edge in incident.into_iter().take(shown) {
814 let prefix = if edge.incoming { "<-" } else { "->" };
815 lines.extend(render_edge_lines(prefix, edge.edge, edge.related, false));
816 }
817 if edge_cap > 0 && incident_count(graph, &node.id) > shown {
818 lines.push(format!(
819 "... {} more links omitted",
820 incident_count(graph, &node.id) - shown
821 ));
822 }
823 lines
824}
825
826fn incident_count(graph: &GraphFile, node_id: &str) -> usize {
827 graph
828 .edges
829 .iter()
830 .filter(|edge| edge.source_id == node_id || edge.target_id == node_id)
831 .count()
832}
833
834struct IncidentEdge<'a> {
835 edge: &'a Edge,
836 related: &'a Node,
837 incoming: bool,
838}
839
840fn incident_edges<'a>(graph: &'a GraphFile, node_id: &str) -> Vec<IncidentEdge<'a>> {
841 let mut edges = Vec::new();
842 for edge in &graph.edges {
843 if edge.source_id == node_id {
844 if let Some(related) = graph.node_by_id(&edge.target_id) {
845 edges.push(IncidentEdge {
846 edge,
847 related,
848 incoming: false,
849 });
850 }
851 } else if edge.target_id == node_id {
852 if let Some(related) = graph.node_by_id(&edge.source_id) {
853 edges.push(IncidentEdge {
854 edge,
855 related,
856 incoming: true,
857 });
858 }
859 }
860 }
861 edges.sort_by(|left, right| {
862 right
863 .related
864 .properties
865 .importance
866 .cmp(&left.related.properties.importance)
867 .then_with(|| left.edge.relation.cmp(&right.edge.relation))
868 .then_with(|| left.related.id.cmp(&right.related.id))
869 });
870 edges
871}
872
873fn summarize_relations(edges: &[IncidentEdge<'_>]) -> (String, String) {
874 let mut out: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
875 let mut incoming: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
876
877 for edge in edges {
878 let bucket = if edge.incoming {
879 &mut incoming
880 } else {
881 &mut out
882 };
883 *bucket.entry(edge.edge.relation.clone()).or_insert(0) += 1;
884 }
885
886 (join_relation_counts(&out), join_relation_counts(&incoming))
887}
888
889fn join_relation_counts(counts: &std::collections::BTreeMap<String, usize>) -> String {
890 counts
891 .iter()
892 .take(3)
893 .map(|(relation, count)| format!("{relation} x{count}"))
894 .collect::<Vec<_>>()
895 .join(", ")
896}
897
898fn render_node_block(graph: &GraphFile, node: &Node, full: bool) -> String {
899 let mut lines = Vec::new();
900 lines.push(format!(
901 "# {} | {} [{}]",
902 node.id,
903 escape_cli_text(&node.name),
904 node.r#type
905 ));
906
907 if !node.properties.alias.is_empty() {
908 lines.push(format!(
909 "aka: {}",
910 node.properties
911 .alias
912 .iter()
913 .map(|alias| escape_cli_text(alias))
914 .collect::<Vec<_>>()
915 .join(", ")
916 ));
917 }
918 push_description_line(
919 &mut lines,
920 &node.properties.description,
921 if full { None } else { Some(200) },
922 );
923 if full {
924 if !node.properties.domain_area.is_empty() {
925 lines.push(format!(
926 "domain_area: {}",
927 escape_cli_text(&node.properties.domain_area)
928 ));
929 }
930 if !node.properties.provenance.is_empty() {
931 lines.push(format!(
932 "provenance: {}",
933 escape_cli_text(&node.properties.provenance)
934 ));
935 }
936 if let Some(confidence) = node.properties.confidence {
937 lines.push(format!("confidence: {confidence}"));
938 }
939 lines.push(format!("importance: {}", node.properties.importance));
940 if !node.properties.created_at.is_empty() {
941 lines.push(format!("created_at: {}", node.properties.created_at));
942 }
943 }
944
945 let facts_to_show = if full {
946 node.properties.key_facts.len()
947 } else {
948 node.properties.key_facts.len().min(2)
949 };
950 for fact in node.properties.key_facts.iter().take(facts_to_show) {
951 lines.push(format!("- {}", escape_cli_text(fact)));
952 }
953 let omitted = node
954 .properties
955 .key_facts
956 .len()
957 .saturating_sub(facts_to_show);
958 if omitted > 0 {
959 lines.push(format!("... {omitted} more facts omitted"));
960 }
961
962 if full {
963 if !node.source_files.is_empty() {
964 lines.push(format!(
965 "sources: {}",
966 node.source_files
967 .iter()
968 .map(|source| escape_cli_text(source))
969 .collect::<Vec<_>>()
970 .join(", ")
971 ));
972 }
973 push_feedback_lines(
974 &mut lines,
975 node.properties.feedback_score,
976 node.properties.feedback_count,
977 node.properties.feedback_last_ts_ms,
978 None,
979 );
980 }
981
982 let attached_notes: Vec<_> = graph
983 .notes
984 .iter()
985 .filter(|note| note.node_id == node.id)
986 .collect();
987 if full && !attached_notes.is_empty() {
988 lines.push(format!("notes: {}", attached_notes.len()));
989 for note in attached_notes {
990 lines.extend(render_attached_note_lines(note));
991 }
992 }
993
994 for edge in outgoing_edges(graph, &node.id, full) {
995 if let Some(target) = graph.node_by_id(&edge.target_id) {
996 lines.extend(render_edge_lines("->", edge, target, full));
997 }
998 }
999 for edge in incoming_edges(graph, &node.id, full) {
1000 if let Some(source) = graph.node_by_id(&edge.source_id) {
1001 lines.extend(render_edge_lines("<-", edge, source, full));
1002 }
1003 }
1004
1005 lines.join("\n")
1006}
1007
1008fn outgoing_edges<'a>(graph: &'a GraphFile, node_id: &str, full: bool) -> Vec<&'a Edge> {
1009 let mut edges: Vec<&Edge> = graph
1010 .edges
1011 .iter()
1012 .filter(|edge| edge.source_id == node_id)
1013 .collect();
1014 edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1015 if !full {
1016 edges.truncate(3);
1017 }
1018 edges
1019}
1020
1021fn incoming_edges<'a>(graph: &'a GraphFile, node_id: &str, full: bool) -> Vec<&'a Edge> {
1022 let mut edges: Vec<&Edge> = graph
1023 .edges
1024 .iter()
1025 .filter(|edge| edge.target_id == node_id)
1026 .collect();
1027 edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1028 if !full {
1029 edges.truncate(3);
1030 }
1031 edges
1032}
1033
1034fn render_edge_lines(prefix: &str, edge: &Edge, related: &Node, full: bool) -> Vec<String> {
1035 let (arrow, relation) = if edge.relation.starts_with("NOT_") {
1036 (
1037 format!("{prefix}!"),
1038 edge.relation.trim_start_matches("NOT_"),
1039 )
1040 } else {
1041 (prefix.to_owned(), edge.relation.as_str())
1042 };
1043
1044 let mut line = format!(
1045 "{arrow} {relation} | {} | {}",
1046 related.id,
1047 escape_cli_text(&related.name)
1048 );
1049 if !edge.properties.detail.is_empty() {
1050 line.push_str(" | ");
1051 let detail = escape_cli_text(&edge.properties.detail);
1052 if full {
1053 line.push_str(&detail);
1054 } else {
1055 line.push_str(&truncate(&detail, 80));
1056 }
1057 }
1058 let mut lines = vec![line];
1059 if full {
1060 push_feedback_lines(
1061 &mut lines,
1062 edge.properties.feedback_score,
1063 edge.properties.feedback_count,
1064 edge.properties.feedback_last_ts_ms,
1065 Some("edge_"),
1066 );
1067 if !edge.properties.valid_from.is_empty() {
1068 lines.push(format!("edge_valid_from: {}", edge.properties.valid_from));
1069 }
1070 if !edge.properties.valid_to.is_empty() {
1071 lines.push(format!("edge_valid_to: {}", edge.properties.valid_to));
1072 }
1073 }
1074 lines
1075}
1076
1077fn truncate(value: &str, max_len: usize) -> String {
1078 let char_count = value.chars().count();
1079 if char_count <= max_len {
1080 return value.to_owned();
1081 }
1082 let truncated: String = value.chars().take(max_len.saturating_sub(3)).collect();
1083 format!("{truncated}...")
1084}
1085
1086fn escape_cli_text(value: &str) -> String {
1087 let mut out = String::new();
1088 for ch in value.chars() {
1089 match ch {
1090 '\\' => out.push_str("\\\\"),
1091 '\n' => out.push_str("\\n"),
1092 '\r' => out.push_str("\\r"),
1093 '\t' => out.push_str("\\t"),
1094 _ => out.push(ch),
1095 }
1096 }
1097 out
1098}
1099
1100fn push_description_line(lines: &mut Vec<String>, description: &str, max_len: Option<usize>) {
1101 if description.is_empty() {
1102 return;
1103 }
1104 let escaped = escape_cli_text(description);
1105 let rendered = match max_len {
1106 Some(limit) => truncate(&escaped, limit),
1107 None => escaped,
1108 };
1109 lines.push(format!("desc: {rendered}"));
1110}
1111
1112fn push_feedback_lines(
1113 lines: &mut Vec<String>,
1114 score: f64,
1115 count: u64,
1116 last_ts_ms: Option<u64>,
1117 prefix: Option<&str>,
1118) {
1119 let prefix = prefix.unwrap_or("");
1120 if score != 0.0 {
1121 lines.push(format!("{prefix}feedback_score: {score}"));
1122 }
1123 if count != 0 {
1124 lines.push(format!("{prefix}feedback_count: {count}"));
1125 }
1126 if let Some(ts) = last_ts_ms {
1127 lines.push(format!("{prefix}feedback_last_ts_ms: {ts}"));
1128 }
1129}
1130
1131fn render_attached_note_lines(note: &crate::graph::Note) -> Vec<String> {
1132 let mut lines = vec![format!("! {}", note.id)];
1133 if !note.body.is_empty() {
1134 lines.push(format!("note_body: {}", escape_cli_text(¬e.body)));
1135 }
1136 if !note.tags.is_empty() {
1137 lines.push(format!(
1138 "note_tags: {}",
1139 note.tags
1140 .iter()
1141 .map(|tag| escape_cli_text(tag))
1142 .collect::<Vec<_>>()
1143 .join(", ")
1144 ));
1145 }
1146 if !note.author.is_empty() {
1147 lines.push(format!("note_author: {}", escape_cli_text(¬e.author)));
1148 }
1149 if !note.created_at.is_empty() {
1150 lines.push(format!("note_created_at: {}", note.created_at));
1151 }
1152 if !note.provenance.is_empty() {
1153 lines.push(format!(
1154 "note_provenance: {}",
1155 escape_cli_text(¬e.provenance)
1156 ));
1157 }
1158 if !note.source_files.is_empty() {
1159 lines.push(format!(
1160 "note_sources: {}",
1161 note.source_files
1162 .iter()
1163 .map(|source| escape_cli_text(source))
1164 .collect::<Vec<_>>()
1165 .join(", ")
1166 ));
1167 }
1168 lines
1169}
1170
1171fn find_matches_with_index<'a>(
1172 graph: &'a GraphFile,
1173 query: &str,
1174 limit: usize,
1175 include_features: bool,
1176 mode: FindMode,
1177 index: Option<&Bm25Index>,
1178) -> Vec<ScoredNode<'a>> {
1179 let mut matches = find_all_matches_with_index(graph, query, include_features, mode, index);
1180 matches.truncate(limit);
1181 matches
1182}
1183
1184fn find_all_matches_with_index<'a>(
1185 graph: &'a GraphFile,
1186 query: &str,
1187 include_features: bool,
1188 mode: FindMode,
1189 index: Option<&Bm25Index>,
1190) -> Vec<ScoredNode<'a>> {
1191 let context = FindQueryContext::build(graph);
1192 let mut scored: Vec<ScoredNode<'a>> = match mode {
1193 FindMode::Fuzzy => {
1194 let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
1195 let mut matcher = Matcher::new(Config::DEFAULT);
1196 let candidates = graph
1197 .nodes
1198 .iter()
1199 .filter(|node| include_features || node.r#type != "Feature")
1200 .filter_map(|node| {
1201 score_node(&context, node, query, &pattern, &mut matcher).map(|score| {
1202 RawCandidate {
1203 node,
1204 raw_relevance: score as f64,
1205 lexical_boost: 0,
1206 }
1207 })
1208 })
1209 .collect();
1210 compose_scores(candidates)
1211 }
1212 FindMode::Bm25 => compose_scores(score_bm25_raw(
1213 graph,
1214 &context,
1215 query,
1216 include_features,
1217 index,
1218 )),
1219 };
1220
1221 scored.sort_by(|left, right| {
1222 right
1223 .score
1224 .cmp(&left.score)
1225 .then_with(|| left.node.id.cmp(&right.node.id))
1226 });
1227 scored
1228}
1229
1230fn compose_scores<'a>(candidates: Vec<RawCandidate<'a>>) -> Vec<ScoredNode<'a>> {
1231 let max_raw = candidates
1232 .iter()
1233 .map(|candidate| candidate.raw_relevance)
1234 .fold(0.0f64, f64::max);
1235 let max_raw_log = max_raw.ln_1p();
1236
1237 candidates
1238 .into_iter()
1239 .filter_map(|candidate| {
1240 if candidate.raw_relevance <= 0.0 {
1241 return None;
1242 }
1243 let normalized_relevance = if max_raw_log > 0.0 {
1244 ((candidate.raw_relevance.ln_1p() / max_raw_log) * 1000.0).round() as i64
1245 } else {
1246 0
1247 };
1248 let feedback = feedback_boost(candidate.node);
1249 let importance = importance_boost(candidate.node);
1250 let authority_raw = feedback + importance;
1251 let relative_cap =
1252 ((normalized_relevance as f64) * SCORE_META_MAX_RATIO).round() as i64;
1253 let authority_cap = relative_cap.max(SCORE_META_MIN_CAP).min(SCORE_META_MAX_CAP);
1254 let authority_applied = authority_raw.clamp(-authority_cap, authority_cap);
1255 let final_score = normalized_relevance + authority_applied;
1256
1257 Some(ScoredNode {
1258 score: final_score,
1259 node: candidate.node,
1260 breakdown: ScoreBreakdown {
1261 raw_relevance: candidate.raw_relevance,
1262 normalized_relevance,
1263 lexical_boost: candidate.lexical_boost,
1264 feedback_boost: feedback,
1265 importance_boost: importance,
1266 authority_raw,
1267 authority_applied,
1268 authority_cap,
1269 },
1270 })
1271 })
1272 .collect()
1273}
1274
1275fn feedback_boost(node: &Node) -> i64 {
1276 let count = node.properties.feedback_count as f64;
1277 if count <= 0.0 {
1278 return 0;
1279 }
1280 let avg = node.properties.feedback_score / count;
1281 let confidence = (count.ln_1p() / 3.0).min(1.0);
1282 let scaled = avg * 200.0 * confidence;
1283 scaled.clamp(-300.0, 300.0).round() as i64
1284}
1285
1286fn importance_boost(node: &Node) -> i64 {
1287 (i64::from(node.properties.importance) - IMPORTANCE_NEUTRAL) * IMPORTANCE_STEP_BOOST
1288}
1289
1290fn score_bm25_raw<'a>(
1291 graph: &'a GraphFile,
1292 context: &FindQueryContext<'a>,
1293 query: &str,
1294 include_features: bool,
1295 index: Option<&Bm25Index>,
1296) -> Vec<RawCandidate<'a>> {
1297 let terms = tokenize(query);
1298 if terms.is_empty() {
1299 return Vec::new();
1300 }
1301
1302 if let Some(idx) = index {
1303 let results = idx.search(&terms, graph);
1304 return results
1305 .into_iter()
1306 .filter_map(|(node_id, score)| {
1307 let node = graph.node_by_id(&node_id)?;
1308 if !include_features && node.r#type == "Feature" {
1309 return None;
1310 }
1311 let document_terms = node_document_terms(context, node);
1312 let lexical_boost = bm25_lexical_boost(&terms, &document_terms);
1313 Some(RawCandidate {
1314 node,
1315 raw_relevance: score as f64 * 100.0 + lexical_boost as f64,
1316 lexical_boost,
1317 })
1318 })
1319 .collect();
1320 }
1321
1322 let mut docs: Vec<(&'a Node, Vec<String>)> = graph
1323 .nodes
1324 .iter()
1325 .filter(|node| include_features || node.r#type != "Feature")
1326 .map(|node| (node, node_document_terms(context, node)))
1327 .collect();
1328
1329 if docs.is_empty() {
1330 return Vec::new();
1331 }
1332
1333 let mut df: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
1334 for term in &terms {
1335 let mut count = 0usize;
1336 for (_, tokens) in &docs {
1337 if tokens.iter().any(|t| t == term) {
1338 count += 1;
1339 }
1340 }
1341 df.insert(term.as_str(), count);
1342 }
1343
1344 let total_docs = docs.len() as f64;
1345 let avgdl = docs
1346 .iter()
1347 .map(|(_, tokens)| tokens.len() as f64)
1348 .sum::<f64>()
1349 / total_docs;
1350
1351 let mut scored = Vec::new();
1352
1353 for (node, tokens) in docs.drain(..) {
1354 let dl = tokens.len() as f64;
1355 if dl == 0.0 {
1356 continue;
1357 }
1358 let mut score = 0.0f64;
1359 for term in &terms {
1360 let tf = tokens.iter().filter(|t| *t == term).count() as f64;
1361 if tf == 0.0 {
1362 continue;
1363 }
1364 let df_t = *df.get(term.as_str()).unwrap_or(&0) as f64;
1365 let idf = (1.0 + (total_docs - df_t + 0.5) / (df_t + 0.5)).ln();
1366 let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl));
1367 score += idf * (tf * (BM25_K1 + 1.0) / denom);
1368 }
1369 if score > 0.0 {
1370 let lexical_boost = bm25_lexical_boost(&terms, &tokens);
1371 scored.push(RawCandidate {
1372 node,
1373 raw_relevance: score * 100.0 + lexical_boost as f64,
1374 lexical_boost,
1375 });
1376 }
1377 }
1378
1379 scored
1380}
1381
1382fn node_document_terms(context: &FindQueryContext<'_>, node: &Node) -> Vec<String> {
1383 let mut tokens = Vec::new();
1384 push_terms(&mut tokens, &node.id, BM25_ID_WEIGHT);
1385 push_terms(&mut tokens, &node.name, BM25_NAME_WEIGHT);
1386 push_terms(
1387 &mut tokens,
1388 &node.properties.description,
1389 BM25_DESCRIPTION_WEIGHT,
1390 );
1391 for alias in &node.properties.alias {
1392 push_terms(&mut tokens, alias, BM25_ALIAS_WEIGHT);
1393 }
1394 for fact in &node.properties.key_facts {
1395 push_terms(&mut tokens, fact, BM25_FACT_WEIGHT);
1396 }
1397 for note in context.notes_for(&node.id) {
1398 push_terms(&mut tokens, ¬e.body, BM25_NOTE_BODY_WEIGHT);
1399 for tag in ¬e.tags {
1400 push_terms(&mut tokens, tag, BM25_NOTE_TAG_WEIGHT);
1401 }
1402 }
1403 for neighbor in context.neighbors_for(&node.id) {
1404 push_terms(&mut tokens, &neighbor.id, BM25_NEIGHBOR_WEIGHT);
1405 push_terms(&mut tokens, &neighbor.name, BM25_NEIGHBOR_WEIGHT);
1406 push_terms(
1407 &mut tokens,
1408 &neighbor.properties.description,
1409 BM25_NEIGHBOR_WEIGHT,
1410 );
1411 for alias in &neighbor.properties.alias {
1412 push_terms(&mut tokens, alias, BM25_NEIGHBOR_WEIGHT);
1413 }
1414 }
1415 tokens
1416}
1417
1418fn push_terms(target: &mut Vec<String>, value: &str, weight: usize) {
1419 if value.is_empty() {
1420 return;
1421 }
1422 let terms = tokenize(value);
1423 for _ in 0..weight {
1424 target.extend(terms.iter().cloned());
1425 }
1426}
1427
1428fn tokenize(text: &str) -> Vec<String> {
1429 let mut tokens = Vec::new();
1430 let mut current = String::new();
1431 for ch in text.chars() {
1432 if ch.is_alphanumeric() {
1433 for lower in ch.to_lowercase() {
1434 current.push(lower);
1435 }
1436 } else if !current.is_empty() {
1437 tokens.push(std::mem::take(&mut current));
1438 }
1439 }
1440 if !current.is_empty() {
1441 tokens.push(current);
1442 }
1443 tokens
1444}
1445
1446fn bm25_lexical_boost(query_terms: &[String], document_terms: &[String]) -> i64 {
1447 if query_terms.is_empty() || document_terms.is_empty() {
1448 return 0;
1449 }
1450 if query_terms.len() > 1 && contains_token_phrase(document_terms, query_terms) {
1451 return BM25_PHRASE_MATCH_BOOST;
1452 }
1453 let document_vocab: HashSet<&str> = document_terms.iter().map(String::as_str).collect();
1454 let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
1455 let matched_tokens = query_vocab
1456 .iter()
1457 .filter(|token| document_vocab.contains(**token))
1458 .count() as i64;
1459 if matched_tokens == 0 {
1460 return 0;
1461 }
1462 let query_token_count = query_vocab.len() as i64;
1463 (matched_tokens * BM25_TOKEN_MATCH_BOOST + query_token_count - 1) / query_token_count
1464}
1465
1466fn contains_token_phrase(document_terms: &[String], query_terms: &[String]) -> bool {
1467 if query_terms.is_empty() || query_terms.len() > document_terms.len() {
1468 return false;
1469 }
1470 document_terms
1471 .windows(query_terms.len())
1472 .any(|window| window == query_terms)
1473}
1474
1475fn score_node(
1476 context: &FindQueryContext<'_>,
1477 node: &Node,
1478 query: &str,
1479 pattern: &Pattern,
1480 matcher: &mut Matcher,
1481) -> Option<u32> {
1482 let mut primary_score = 0;
1483 let mut primary_hits = 0;
1484
1485 let id_score = score_primary_field(query, pattern, matcher, &node.id, 4);
1486 if id_score > 0 {
1487 primary_hits += 1;
1488 }
1489 primary_score += id_score;
1490
1491 let name_score = score_primary_field(query, pattern, matcher, &node.name, 3);
1492 if name_score > 0 {
1493 primary_hits += 1;
1494 }
1495 primary_score += name_score;
1496
1497 for alias in &node.properties.alias {
1498 let alias_score = score_primary_field(query, pattern, matcher, alias, 3);
1499 if alias_score > 0 {
1500 primary_hits += 1;
1501 }
1502 primary_score += alias_score;
1503 }
1504
1505 let mut contextual_score = score_secondary_field(
1506 query,
1507 pattern,
1508 matcher,
1509 &node.properties.description,
1510 FUZZY_DESCRIPTION_WEIGHT,
1511 );
1512 for fact in &node.properties.key_facts {
1513 contextual_score += score_secondary_field(query, pattern, matcher, fact, FUZZY_FACT_WEIGHT);
1514 }
1515 contextual_score += score_notes_context(context, node, query, pattern, matcher);
1516
1517 let neighbor_context = score_neighbor_context(context, node, query, pattern, matcher)
1518 .min(FUZZY_NEIGHBOR_CONTEXT_CAP);
1519 contextual_score += if primary_hits > 0 {
1520 neighbor_context / 2
1521 } else {
1522 neighbor_context
1523 };
1524
1525 if primary_hits == 0 {
1526 contextual_score /= FUZZY_NO_PRIMARY_CONTEXT_DIVISOR;
1527 }
1528
1529 let total = primary_score + contextual_score;
1530 (total > 0).then_some(total)
1531}
1532
1533fn score_notes_context(
1534 context: &FindQueryContext<'_>,
1535 node: &Node,
1536 query: &str,
1537 pattern: &Pattern,
1538 matcher: &mut Matcher,
1539) -> u32 {
1540 let mut total = 0;
1541 for note in context.notes_for(&node.id) {
1542 total += score_secondary_field(query, pattern, matcher, ¬e.body, FUZZY_NOTE_BODY_WEIGHT);
1543 for tag in ¬e.tags {
1544 total += score_secondary_field(query, pattern, matcher, tag, FUZZY_NOTE_TAG_WEIGHT);
1545 }
1546 }
1547 total
1548}
1549
1550fn score_neighbor_context(
1551 context: &FindQueryContext<'_>,
1552 node: &Node,
1553 query: &str,
1554 pattern: &Pattern,
1555 matcher: &mut Matcher,
1556) -> u32 {
1557 let mut best = 0;
1558
1559 for neighbor in context.neighbors_for(&node.id) {
1560 let mut score = score_secondary_field(query, pattern, matcher, &neighbor.id, 1)
1561 + score_secondary_field(query, pattern, matcher, &neighbor.name, 1)
1562 + score_secondary_field(query, pattern, matcher, &neighbor.properties.description, 1);
1563
1564 for alias in &neighbor.properties.alias {
1565 score += score_secondary_field(query, pattern, matcher, alias, 1);
1566 }
1567
1568 best = best.max(score);
1569 }
1570
1571 best
1572}
1573
1574fn score_field(pattern: &Pattern, matcher: &mut Matcher, value: &str) -> Option<u32> {
1575 if value.is_empty() {
1576 return None;
1577 }
1578 let mut buf = Vec::new();
1579 let haystack = Utf32Str::new(value, &mut buf);
1580 pattern.score(haystack, matcher)
1581}
1582
1583fn score_primary_field(
1584 query: &str,
1585 pattern: &Pattern,
1586 matcher: &mut Matcher,
1587 value: &str,
1588 weight: u32,
1589) -> u32 {
1590 let bonus = textual_bonus(query, value);
1591 let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
1592 if bonus == 0 && fuzzy == 0 {
1593 return 0;
1594 }
1595 (fuzzy + bonus) * weight
1596}
1597
1598fn score_secondary_field(
1599 query: &str,
1600 pattern: &Pattern,
1601 matcher: &mut Matcher,
1602 value: &str,
1603 weight: u32,
1604) -> u32 {
1605 let bonus = textual_bonus(query, value);
1606 let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
1607 if bonus == 0 && fuzzy == 0 {
1608 return 0;
1609 }
1610 (fuzzy + bonus / 2) * weight
1611}
1612
1613fn textual_bonus(query: &str, value: &str) -> u32 {
1614 let query = query.trim().to_lowercase();
1615 let value = value.to_lowercase();
1616
1617 if value == query {
1618 return 400;
1619 }
1620 if value.contains(&query) {
1621 return 200;
1622 }
1623
1624 query
1625 .split_whitespace()
1626 .map(|token| {
1627 if value.contains(token) {
1628 80
1629 } else if is_subsequence(token, &value) {
1630 40
1631 } else {
1632 0
1633 }
1634 })
1635 .sum()
1636}
1637
1638fn is_subsequence(needle: &str, haystack: &str) -> bool {
1639 if needle.is_empty() {
1640 return false;
1641 }
1642
1643 let mut chars = needle.chars();
1644 let mut current = match chars.next() {
1645 Some(ch) => ch,
1646 None => return false,
1647 };
1648
1649 for ch in haystack.chars() {
1650 if ch == current {
1651 match chars.next() {
1652 Some(next) => current = next,
1653 None => return true,
1654 }
1655 }
1656 }
1657
1658 false
1659}
1660
1661#[cfg(test)]
1662mod tests {
1663 use super::*;
1664
1665 fn make_node(
1666 id: &str,
1667 name: &str,
1668 description: &str,
1669 key_facts: &[&str],
1670 alias: &[&str],
1671 importance: u8,
1672 feedback_score: f64,
1673 feedback_count: u64,
1674 ) -> Node {
1675 let mut properties = crate::graph::NodeProperties::default();
1676 properties.description = description.to_owned();
1677 properties.key_facts = key_facts.iter().map(|v| (*v).to_owned()).collect();
1678 properties.alias = alias.iter().map(|v| (*v).to_owned()).collect();
1679 properties.importance = importance;
1680 properties.feedback_score = feedback_score;
1681 properties.feedback_count = feedback_count;
1682 Node {
1683 id: id.to_owned(),
1684 r#type: "Concept".to_owned(),
1685 name: name.to_owned(),
1686 properties,
1687 source_files: Vec::new(),
1688 }
1689 }
1690
1691 fn score_for(results: &[ScoredNode<'_>], id: &str) -> i64 {
1692 results
1693 .iter()
1694 .find(|item| item.node.id == id)
1695 .map(|item| item.score)
1696 .expect("score for node")
1697 }
1698
1699 #[test]
1700 fn textual_bonus_tiers_are_stable() {
1701 assert_eq!(textual_bonus("abc", "abc"), 400);
1702 assert_eq!(textual_bonus("abc", "xxabcxx"), 200);
1703 assert_eq!(textual_bonus("abc def", "aa abc and def zz"), 160);
1704 assert_eq!(textual_bonus("abc", "aXbYc"), 40);
1705 assert_eq!(textual_bonus("abc", "zzz"), 0);
1706 }
1707
1708 #[test]
1709 fn tokenize_handles_unicode_casefolding() {
1710 let tokens = tokenize("ŁÓDŹ smart-home");
1711 assert_eq!(tokens, vec!["łódź", "smart", "home"]);
1712 }
1713
1714 #[test]
1715 fn bm25_lexical_boost_prefers_phrase_then_tokens() {
1716 let query_terms = tokenize("smart home api");
1717 assert_eq!(
1718 bm25_lexical_boost(&query_terms, &tokenize("x smart home api y")),
1719 120
1720 );
1721 assert_eq!(
1722 bm25_lexical_boost(&query_terms, &tokenize("smart x api y home")),
1723 45
1724 );
1725 assert_eq!(
1726 bm25_lexical_boost(&query_terms, &tokenize("nothing here")),
1727 0
1728 );
1729 }
1730
1731 #[test]
1732 fn score_node_uses_key_facts_and_notes_without_primary_match() {
1733 let node = make_node(
1734 "concept:gateway",
1735 "Gateway",
1736 "",
1737 &["Autentykacja OAuth2 przez konto producenta"],
1738 &[],
1739 4,
1740 0.0,
1741 0,
1742 );
1743 let mut graph = GraphFile::new("test");
1744 graph.nodes.push(node.clone());
1745 graph.notes.push(crate::graph::Note {
1746 id: "note:oauth".to_owned(),
1747 node_id: node.id.clone(),
1748 body: "Token refresh przez OAuth2".to_owned(),
1749 tags: vec!["oauth2".to_owned()],
1750 ..Default::default()
1751 });
1752
1753 let pattern = Pattern::parse(
1754 "oauth2 producenta",
1755 CaseMatching::Ignore,
1756 Normalization::Smart,
1757 );
1758 let context = FindQueryContext::build(&graph);
1759 let mut matcher = Matcher::new(Config::DEFAULT);
1760 let score = score_node(&context, &node, "oauth2 producenta", &pattern, &mut matcher);
1761 assert!(score.is_some_and(|value| value > 0));
1762
1763 let empty_graph = GraphFile::new("empty");
1764 let empty_node = make_node("concept:gateway", "Gateway", "", &[], &[], 4, 0.0, 0);
1765 let empty_context = FindQueryContext::build(&empty_graph);
1766 let mut matcher = Matcher::new(Config::DEFAULT);
1767 let empty_score = score_node(
1768 &empty_context,
1769 &empty_node,
1770 "oauth2 producenta",
1771 &pattern,
1772 &mut matcher,
1773 );
1774 assert!(empty_score.is_none());
1775 }
1776
1777 #[test]
1778 fn score_bm25_respects_importance_boost_for_equal_documents() {
1779 let mut graph = GraphFile::new("test");
1780 graph.nodes.push(make_node(
1781 "concept:high",
1782 "High",
1783 "smart home api",
1784 &[],
1785 &[],
1786 6,
1787 0.0,
1788 0,
1789 ));
1790 graph.nodes.push(make_node(
1791 "concept:low",
1792 "Low",
1793 "smart home api",
1794 &[],
1795 &[],
1796 1,
1797 0.0,
1798 0,
1799 ));
1800
1801 let results =
1802 find_all_matches_with_index(&graph, "smart home api", true, FindMode::Bm25, None);
1803 let high_score = score_for(&results, "concept:high");
1804 let low_score = score_for(&results, "concept:low");
1805 assert!(high_score > low_score);
1806 }
1807
1808 #[test]
1809 fn final_score_caps_authority_boost_for_weak_relevance() {
1810 let weak = make_node(
1811 "concept:weak",
1812 "Weak",
1813 "smart home api",
1814 &[],
1815 &[],
1816 6,
1817 300.0,
1818 1,
1819 );
1820 let strong = make_node(
1821 "concept:strong",
1822 "Strong",
1823 "smart home api smart home api smart home api smart home api",
1824 &[],
1825 &[],
1826 4,
1827 0.0,
1828 0,
1829 );
1830 let candidates = vec![
1831 RawCandidate {
1832 node: &weak,
1833 raw_relevance: 12.0,
1834 lexical_boost: 0,
1835 },
1836 RawCandidate {
1837 node: &strong,
1838 raw_relevance: 100.0,
1839 lexical_boost: 0,
1840 },
1841 ];
1842 let scored = compose_scores(candidates);
1843 let weak_scored = scored
1844 .iter()
1845 .find(|item| item.node.id == "concept:weak")
1846 .expect("weak node");
1847 assert_eq!(
1848 weak_scored.breakdown.authority_applied,
1849 weak_scored.breakdown.authority_cap
1850 );
1851 assert!(weak_scored.breakdown.authority_raw > weak_scored.breakdown.authority_cap);
1852 }
1853
1854 #[test]
1855 fn importance_and_feedback_boost_have_expected_ranges() {
1856 let high_importance = make_node("concept:high", "High", "", &[], &[], 6, 0.0, 0);
1857 let low_importance = make_node("concept:low", "Low", "", &[], &[], 1, 0.0, 0);
1858 assert_eq!(importance_boost(&high_importance), 44);
1859 assert_eq!(importance_boost(&low_importance), -66);
1860
1861 let positive = make_node("concept:pos", "Pos", "", &[], &[], 4, 1.0, 1);
1862 let negative = make_node("concept:neg", "Neg", "", &[], &[], 4, -2.0, 1);
1863 let saturated = make_node("concept:sat", "Sat", "", &[], &[], 4, 300.0, 1);
1864 assert_eq!(feedback_boost(&positive), 46);
1865 assert_eq!(feedback_boost(&negative), -92);
1866 assert_eq!(feedback_boost(&saturated), 300);
1867 }
1868}