1use std::collections::{HashMap, HashSet, VecDeque};
2
3use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
4use nucleo_matcher::{Config, Matcher, Utf32Str};
5
6use crate::graph::{Edge, GraphFile, Node, Note};
7use crate::index::Bm25Index;
8use crate::text_norm;
9
10const BM25_K1: f64 = 1.5;
11const BM25_B: f64 = 0.75;
12const DEFAULT_TARGET_CHARS: usize = 4200;
13const MIN_TARGET_CHARS: usize = 300;
14const MAX_TARGET_CHARS: usize = 12_000;
15const FUZZY_NEIGHBOR_CONTEXT_CAP: u32 = 220;
16const FUZZY_NO_PRIMARY_CONTEXT_DIVISOR: u32 = 3;
17const FUZZY_NEIGHBOR_CONTEXT_DIVISOR: u32 = 3;
18const FUZZY_DESCRIPTION_WEIGHT: u32 = 2;
19const FUZZY_FACT_WEIGHT: u32 = 2;
20const FUZZY_NOTE_BODY_WEIGHT: u32 = 1;
21const FUZZY_NOTE_TAG_WEIGHT: u32 = 2;
22const BM25_PHRASE_MATCH_BOOST: i64 = 120;
23const BM25_PROXIMITY_MATCH_BOOST: i64 = 80;
24const BM25_TOKEN_MATCH_BOOST: i64 = 45;
25const BM25_ID_WEIGHT: usize = 5;
26const BM25_NAME_WEIGHT: usize = 4;
27const BM25_ALIAS_WEIGHT: usize = 4;
28const BM25_DESCRIPTION_WEIGHT: usize = 2;
29const BM25_FACT_WEIGHT: usize = 2;
30const BM25_NOTE_BODY_WEIGHT: usize = 1;
31const BM25_NOTE_TAG_WEIGHT: usize = 1;
32const BM25_NEIGHBOR_WEIGHT: usize = 1;
33const BM25_SELF_CONTEXT_WEIGHT: f64 = 3.0;
34const BM25_NEIGHBOR_CONTEXT_WEIGHT: f64 = 1.0;
35const BM25_PROXIMITY_WINDOW_TOKENS: usize = 6;
36const FACT_VOLUME_BASE_CHARS: f64 = 500.0;
37const FACT_VOLUME_MIN_FACTOR: f64 = 0.35;
38const IMPORTANCE_NEUTRAL: f64 = 0.5;
39const IMPORTANCE_MAX_ABS_BOOST: f64 = 66.0;
40const SCORE_META_MAX_RATIO: f64 = 0.35;
41const SCORE_META_MIN_CAP: i64 = 30;
42const SCORE_META_MAX_CAP: i64 = 240;
43
44#[derive(Debug, Clone, Copy)]
45pub enum FindMode {
46 Fuzzy,
47 Bm25,
48 Hybrid,
49}
50
51#[derive(Debug, Clone, Copy)]
52pub struct FindTune {
53 pub bm25: f64,
54 pub fuzzy: f64,
55 pub vector: f64,
56}
57
58impl FindTune {
59 pub fn parse(raw: &str) -> Option<Self> {
60 let mut tune = Self::default();
61 for part in raw.split(',') {
62 let (key, value) = part.split_once('=')?;
63 let value = value.trim().parse::<f64>().ok()?;
64 match key.trim() {
65 "bm25" => tune.bm25 = value,
66 "fuzzy" => tune.fuzzy = value,
67 "vector" => tune.vector = value,
68 _ => return None,
69 }
70 }
71 Some(tune.clamped())
72 }
73
74 fn clamped(self) -> Self {
75 Self {
76 bm25: self.bm25.clamp(0.0, 1.0),
77 fuzzy: self.fuzzy.clamp(0.0, 1.0),
78 vector: self.vector.clamp(0.0, 1.0),
79 }
80 }
81}
82
83impl Default for FindTune {
84 fn default() -> Self {
85 Self {
86 bm25: 0.55,
87 fuzzy: 0.35,
88 vector: 0.10,
89 }
90 }
91}
92
93#[derive(Clone, Copy)]
94struct ScoredNode<'a> {
95 score: i64,
96 node: &'a Node,
97 breakdown: ScoreBreakdown,
98}
99
100#[derive(Debug, Clone, Copy)]
101struct ScoreBreakdown {
102 raw_relevance: f64,
103 normalized_relevance: i64,
104 lexical_boost: i64,
105 feedback_boost: i64,
106 importance_boost: i64,
107 authority_raw: i64,
108 authority_applied: i64,
109 authority_cap: i64,
110}
111
112struct RawCandidate<'a> {
113 node: &'a Node,
114 raw_relevance: f64,
115 lexical_boost: i64,
116}
117
118struct FindQueryContext<'a> {
119 notes_by_node: HashMap<&'a str, Vec<&'a Note>>,
120 neighbors_by_node: HashMap<&'a str, Vec<&'a Node>>,
121}
122
123impl<'a> FindQueryContext<'a> {
124 fn build(graph: &'a GraphFile) -> Self {
125 let node_by_id: HashMap<&'a str, &'a Node> = graph
126 .nodes
127 .iter()
128 .map(|node| (node.id.as_str(), node))
129 .collect();
130
131 let mut notes_by_node: HashMap<&'a str, Vec<&'a Note>> = HashMap::new();
132 for note in &graph.notes {
133 notes_by_node
134 .entry(note.node_id.as_str())
135 .or_default()
136 .push(note);
137 }
138
139 let mut neighbors_by_node: HashMap<&'a str, Vec<&'a Node>> = HashMap::new();
140 for edge in &graph.edges {
141 if let (Some(source), Some(target)) = (
142 node_by_id.get(edge.source_id.as_str()),
143 node_by_id.get(edge.target_id.as_str()),
144 ) {
145 neighbors_by_node
146 .entry(source.id.as_str())
147 .or_default()
148 .push(*target);
149 neighbors_by_node
150 .entry(target.id.as_str())
151 .or_default()
152 .push(*source);
153 }
154 }
155
156 for neighbors in neighbors_by_node.values_mut() {
157 neighbors.sort_by(|left, right| left.id.cmp(&right.id));
158 neighbors.dedup_by(|left, right| left.id == right.id);
159 }
160
161 Self {
162 notes_by_node,
163 neighbors_by_node,
164 }
165 }
166
167 fn notes_for(&self, node_id: &str) -> &[&'a Note] {
168 self.notes_by_node
169 .get(node_id)
170 .map(Vec::as_slice)
171 .unwrap_or(&[])
172 }
173
174 fn neighbors_for(&self, node_id: &str) -> &[&'a Node] {
175 self.neighbors_by_node
176 .get(node_id)
177 .map(Vec::as_slice)
178 .unwrap_or(&[])
179 }
180}
181
182#[derive(Debug, Clone)]
183pub struct ScoreBreakdownResult {
184 pub raw_relevance: f64,
185 pub normalized_relevance: i64,
186 pub lexical_boost: i64,
187 pub feedback_boost: i64,
188 pub importance_boost: i64,
189 pub authority_raw: i64,
190 pub authority_applied: i64,
191 pub authority_cap: i64,
192}
193
194#[derive(Debug, Clone)]
195pub struct ScoredNodeResult {
196 pub score: i64,
197 pub node: Node,
198 pub breakdown: ScoreBreakdownResult,
199}
200
201pub fn render_find(
202 graph: &GraphFile,
203 queries: &[String],
204 limit: usize,
205 include_features: bool,
206 include_metadata: bool,
207 mode: FindMode,
208 full: bool,
209) -> String {
210 render_find_with_index(
211 graph,
212 queries,
213 limit,
214 include_features,
215 include_metadata,
216 mode,
217 full,
218 false,
219 None,
220 )
221}
222
223pub fn render_find_with_index(
224 graph: &GraphFile,
225 queries: &[String],
226 limit: usize,
227 include_features: bool,
228 include_metadata: bool,
229 mode: FindMode,
230 full: bool,
231 debug_score: bool,
232 index: Option<&Bm25Index>,
233) -> String {
234 render_find_with_index_tuned(
235 graph,
236 queries,
237 limit,
238 include_features,
239 include_metadata,
240 mode,
241 full,
242 debug_score,
243 index,
244 None,
245 )
246}
247
248pub fn render_find_with_index_tuned(
249 graph: &GraphFile,
250 queries: &[String],
251 limit: usize,
252 include_features: bool,
253 include_metadata: bool,
254 mode: FindMode,
255 full: bool,
256 debug_score: bool,
257 index: Option<&Bm25Index>,
258 tune: Option<&FindTune>,
259) -> String {
260 let mut sections = Vec::new();
261 for query in queries {
262 let matches = find_all_matches_with_index(
263 graph,
264 query,
265 include_features,
266 include_metadata,
267 mode,
268 index,
269 tune,
270 );
271 let total = matches.len();
272 let visible: Vec<_> = matches.into_iter().take(limit).collect();
273 let shown = visible.len();
274 let mut lines = vec![render_result_header(query, shown, total)];
275 for scored in visible {
276 lines.push(render_scored_node_block(
277 graph,
278 &scored,
279 full,
280 debug_score,
281 Some(query.as_str()),
282 ));
283 }
284 push_limit_omission_line(&mut lines, shown, total);
285 sections.push(lines.join("\n"));
286 }
287 format!("{}\n", sections.join("\n\n"))
288}
289
290pub fn find_nodes(
291 graph: &GraphFile,
292 query: &str,
293 limit: usize,
294 include_features: bool,
295 include_metadata: bool,
296 mode: FindMode,
297) -> Vec<Node> {
298 find_matches_with_index(
299 graph,
300 query,
301 limit,
302 include_features,
303 include_metadata,
304 mode,
305 None,
306 None,
307 )
308 .into_iter()
309 .map(|item| item.node.clone())
310 .collect()
311}
312
313pub fn find_nodes_with_index(
314 graph: &GraphFile,
315 query: &str,
316 limit: usize,
317 include_features: bool,
318 include_metadata: bool,
319 mode: FindMode,
320 index: Option<&Bm25Index>,
321) -> Vec<Node> {
322 find_matches_with_index(
323 graph,
324 query,
325 limit,
326 include_features,
327 include_metadata,
328 mode,
329 index,
330 None,
331 )
332 .into_iter()
333 .map(|item| item.node.clone())
334 .collect()
335}
336
337pub fn find_nodes_with_index_tuned(
338 graph: &GraphFile,
339 query: &str,
340 limit: usize,
341 include_features: bool,
342 include_metadata: bool,
343 mode: FindMode,
344 index: Option<&Bm25Index>,
345 tune: Option<&FindTune>,
346) -> Vec<Node> {
347 find_matches_with_index(
348 graph,
349 query,
350 limit,
351 include_features,
352 include_metadata,
353 mode,
354 index,
355 tune,
356 )
357 .into_iter()
358 .map(|item| item.node.clone())
359 .collect()
360}
361
362pub fn find_nodes_and_total_with_index(
363 graph: &GraphFile,
364 query: &str,
365 limit: usize,
366 include_features: bool,
367 include_metadata: bool,
368 mode: FindMode,
369 index: Option<&Bm25Index>,
370) -> (usize, Vec<Node>) {
371 let matches = find_all_matches_with_index(
372 graph,
373 query,
374 include_features,
375 include_metadata,
376 mode,
377 index,
378 None,
379 );
380 let total = matches.len();
381 let nodes = matches
382 .into_iter()
383 .take(limit)
384 .map(|item| item.node.clone())
385 .collect();
386 (total, nodes)
387}
388
389pub fn find_scored_nodes_and_total_with_index(
390 graph: &GraphFile,
391 query: &str,
392 limit: usize,
393 include_features: bool,
394 include_metadata: bool,
395 mode: FindMode,
396 index: Option<&Bm25Index>,
397) -> (usize, Vec<ScoredNodeResult>) {
398 find_scored_nodes_and_total_with_index_tuned(
399 graph,
400 query,
401 limit,
402 include_features,
403 include_metadata,
404 mode,
405 index,
406 None,
407 )
408}
409
410pub fn find_scored_nodes_and_total_with_index_tuned(
411 graph: &GraphFile,
412 query: &str,
413 limit: usize,
414 include_features: bool,
415 include_metadata: bool,
416 mode: FindMode,
417 index: Option<&Bm25Index>,
418 tune: Option<&FindTune>,
419) -> (usize, Vec<ScoredNodeResult>) {
420 let matches = find_all_matches_with_index(
421 graph,
422 query,
423 include_features,
424 include_metadata,
425 mode,
426 index,
427 tune,
428 );
429 let total = matches.len();
430 let nodes = matches
431 .into_iter()
432 .take(limit)
433 .map(|item| ScoredNodeResult {
434 score: item.score,
435 node: item.node.clone(),
436 breakdown: ScoreBreakdownResult {
437 raw_relevance: item.breakdown.raw_relevance,
438 normalized_relevance: item.breakdown.normalized_relevance,
439 lexical_boost: item.breakdown.lexical_boost,
440 feedback_boost: item.breakdown.feedback_boost,
441 importance_boost: item.breakdown.importance_boost,
442 authority_raw: item.breakdown.authority_raw,
443 authority_applied: item.breakdown.authority_applied,
444 authority_cap: item.breakdown.authority_cap,
445 },
446 })
447 .collect();
448 (total, nodes)
449}
450
451pub fn count_find_results(
452 graph: &GraphFile,
453 queries: &[String],
454 limit: usize,
455 include_features: bool,
456 include_metadata: bool,
457 mode: FindMode,
458) -> usize {
459 count_find_results_with_index(
460 graph,
461 queries,
462 limit,
463 include_features,
464 include_metadata,
465 mode,
466 None,
467 )
468}
469
470pub fn count_find_results_with_index(
471 graph: &GraphFile,
472 queries: &[String],
473 _limit: usize,
474 include_features: bool,
475 include_metadata: bool,
476 mode: FindMode,
477 index: Option<&Bm25Index>,
478) -> usize {
479 let mut total = 0;
480 for query in queries {
481 total += find_all_matches_with_index(
482 graph,
483 query,
484 include_features,
485 include_metadata,
486 mode,
487 index,
488 None,
489 )
490 .len();
491 }
492 total
493}
494
495pub fn render_node(graph: &GraphFile, node: &Node, full: bool) -> String {
496 format!("{}\n", render_node_block(graph, node, full))
497}
498
499pub fn render_node_adaptive(graph: &GraphFile, node: &Node, target_chars: Option<usize>) -> String {
500 let target = clamp_target_chars(target_chars);
501 let full = format!("{}\n", render_node_block(graph, node, true));
502 if fits_target_chars(&full, target) {
503 return full;
504 }
505 let mut candidates = Vec::new();
506 for (depth, detail, edge_cap) in [
507 (0usize, DetailLevel::Rich, 8usize),
508 (1usize, DetailLevel::Rich, 8usize),
509 (2usize, DetailLevel::Rich, 6usize),
510 (2usize, DetailLevel::Compact, 6usize),
511 (2usize, DetailLevel::Minimal, 2usize),
512 ] {
513 let rendered = render_single_node_candidate(graph, node, depth, detail, edge_cap);
514 candidates.push(Candidate {
515 rendered,
516 depth,
517 detail,
518 shown_nodes: 1 + depth,
519 });
520 }
521 pick_best_candidate(candidates, target)
522}
523
524pub fn render_find_adaptive_with_index(
525 graph: &GraphFile,
526 queries: &[String],
527 limit: usize,
528 include_features: bool,
529 include_metadata: bool,
530 mode: FindMode,
531 target_chars: Option<usize>,
532 debug_score: bool,
533 index: Option<&Bm25Index>,
534) -> String {
535 render_find_adaptive_with_index_tuned(
536 graph,
537 queries,
538 limit,
539 include_features,
540 include_metadata,
541 mode,
542 target_chars,
543 debug_score,
544 index,
545 None,
546 )
547}
548
549pub fn render_find_adaptive_with_index_tuned(
550 graph: &GraphFile,
551 queries: &[String],
552 limit: usize,
553 include_features: bool,
554 include_metadata: bool,
555 mode: FindMode,
556 target_chars: Option<usize>,
557 debug_score: bool,
558 index: Option<&Bm25Index>,
559 tune: Option<&FindTune>,
560) -> String {
561 let target = clamp_target_chars(target_chars);
562 let mut sections = Vec::new();
563 for query in queries {
564 let matches = find_all_matches_with_index(
565 graph,
566 query,
567 include_features,
568 include_metadata,
569 mode,
570 index,
571 tune,
572 );
573 let total = matches.len();
574 let visible: Vec<_> = matches.into_iter().take(limit).collect();
575 let section = if visible.len() == 1 {
576 render_single_result_section(graph, query, &visible[0], total, target, debug_score)
577 } else {
578 render_multi_result_section(graph, query, &visible, total, target, debug_score)
579 };
580 sections.push(section);
581 }
582 format!("{}\n", sections.join("\n\n"))
583}
584
585#[derive(Clone, Copy)]
586enum DetailLevel {
587 Rich,
588 Compact,
589 Minimal,
590}
591
592struct Candidate {
593 rendered: String,
594 depth: usize,
595 detail: DetailLevel,
596 shown_nodes: usize,
597}
598
599impl DetailLevel {
600 fn utility_bonus(self) -> usize {
601 match self {
602 DetailLevel::Rich => 20,
603 DetailLevel::Compact => 10,
604 DetailLevel::Minimal => 0,
605 }
606 }
607}
608
609fn clamp_target_chars(target_chars: Option<usize>) -> usize {
610 target_chars
611 .unwrap_or(DEFAULT_TARGET_CHARS)
612 .clamp(MIN_TARGET_CHARS, MAX_TARGET_CHARS)
613}
614
615fn render_single_result_section(
616 graph: &GraphFile,
617 query: &str,
618 node: &ScoredNode<'_>,
619 total_available: usize,
620 target: usize,
621 debug_score: bool,
622) -> String {
623 let header = render_result_header(query, 1, total_available);
624 let full = render_single_result_candidate(
625 graph,
626 query,
627 &header,
628 node,
629 total_available,
630 0,
631 DetailLevel::Rich,
632 8,
633 true,
634 debug_score,
635 );
636 if fits_target_chars(&full, target) {
637 return full.trim_end().to_owned();
638 }
639 let mut candidates = Vec::new();
640 for (depth, detail, edge_cap) in [
641 (0usize, DetailLevel::Rich, 8usize),
642 (1usize, DetailLevel::Rich, 8usize),
643 (2usize, DetailLevel::Rich, 6usize),
644 (2usize, DetailLevel::Compact, 6usize),
645 (2usize, DetailLevel::Minimal, 2usize),
646 ] {
647 candidates.push(Candidate {
648 rendered: render_single_result_candidate(
649 graph,
650 query,
651 &header,
652 node,
653 total_available,
654 depth,
655 detail,
656 edge_cap,
657 false,
658 debug_score,
659 ),
660 depth,
661 detail,
662 shown_nodes: 1 + depth,
663 });
664 }
665 pick_best_candidate(candidates, target)
666 .trim_end()
667 .to_owned()
668}
669
670fn render_multi_result_section(
671 graph: &GraphFile,
672 query: &str,
673 nodes: &[ScoredNode<'_>],
674 total_available: usize,
675 target: usize,
676 debug_score: bool,
677) -> String {
678 let visible_total = nodes.len();
679 let full = render_full_result_section(graph, query, nodes, total_available, debug_score);
680 if fits_target_chars(&full, target) {
681 return full;
682 }
683 let mut candidates = Vec::new();
684 let full_cap = visible_total;
685 let mid_cap = full_cap.min(5);
686 let low_cap = full_cap.min(3);
687
688 for (detail, edge_cap, result_cap, depth) in [
689 (DetailLevel::Rich, 4usize, full_cap.min(4), 0usize),
690 (DetailLevel::Compact, 3usize, full_cap, 0usize),
691 (DetailLevel::Rich, 2usize, mid_cap, 1usize),
692 (DetailLevel::Compact, 1usize, full_cap, 0usize),
693 (DetailLevel::Minimal, 1usize, mid_cap, 0usize),
694 (DetailLevel::Minimal, 0usize, low_cap, 0usize),
695 (DetailLevel::Minimal, 0usize, low_cap.min(2), 1usize),
696 ] {
697 let shown = result_cap.min(nodes.len());
698 let mut lines = vec![render_result_header(query, shown, total_available)];
699 for node in nodes.iter().take(shown) {
700 lines.extend(render_scored_node_candidate_lines(
701 graph,
702 query,
703 node,
704 0,
705 detail,
706 edge_cap,
707 debug_score,
708 ));
709 if depth > 0 {
710 lines.extend(render_neighbor_layers(graph, node.node, depth, detail));
711 }
712 }
713 if visible_total > shown {
714 lines.push(format!("... +{} more nodes omitted", visible_total - shown));
715 }
716 push_limit_omission_line(&mut lines, visible_total, total_available);
717 candidates.push(Candidate {
718 rendered: format!("{}\n", lines.join("\n")),
719 depth,
720 detail,
721 shown_nodes: shown,
722 });
723 }
724
725 pick_best_candidate(candidates, target)
726 .trim_end()
727 .to_owned()
728}
729
730fn pick_best_candidate(candidates: Vec<Candidate>, target: usize) -> String {
731 let lower = (target as f64 * 0.7) as usize;
732 let mut best: Option<(usize, usize, usize, usize, String)> = None;
733
734 for candidate in candidates {
735 let chars = candidate.rendered.chars().count();
736 let overshoot = chars.saturating_sub(target);
737 let undershoot = lower.saturating_sub(chars);
738 let penalty = overshoot.saturating_mul(10).saturating_add(undershoot);
739 let utility = candidate
740 .depth
741 .saturating_mul(100)
742 .saturating_add(candidate.shown_nodes.saturating_mul(5))
743 .saturating_add(candidate.detail.utility_bonus());
744
745 let entry = (
746 penalty,
747 overshoot,
748 usize::MAX - utility,
749 usize::MAX - chars,
750 candidate.rendered,
751 );
752 if best.as_ref().is_none_or(|current| {
753 entry.0 < current.0
754 || (entry.0 == current.0 && entry.1 < current.1)
755 || (entry.0 == current.0 && entry.1 == current.1 && entry.2 < current.2)
756 || (entry.0 == current.0
757 && entry.1 == current.1
758 && entry.2 == current.2
759 && entry.3 < current.3)
760 }) {
761 best = Some(entry);
762 }
763 }
764
765 best.map(|item| item.4).unwrap_or_else(|| "\n".to_owned())
766}
767
768fn render_full_result_section(
769 graph: &GraphFile,
770 query: &str,
771 nodes: &[ScoredNode<'_>],
772 total_available: usize,
773 debug_score: bool,
774) -> String {
775 let mut lines = vec![render_result_header(query, nodes.len(), total_available)];
776 for node in nodes {
777 lines.push(render_scored_node_block(
778 graph,
779 node,
780 true,
781 debug_score,
782 Some(query),
783 ));
784 }
785 push_limit_omission_line(&mut lines, nodes.len(), total_available);
786 lines.join("\n")
787}
788
789fn render_result_header(query: &str, shown: usize, total: usize) -> String {
790 let query = escape_cli_text(query);
791 if shown < total {
792 format!("? {query} ({shown}/{total})")
793 } else {
794 format!("? {query} ({total})")
795 }
796}
797
798fn push_limit_omission_line(lines: &mut Vec<String>, shown: usize, total: usize) {
799 let omitted = total.saturating_sub(shown);
800 if omitted > 0 {
801 lines.push(format!("... {omitted} more nodes omitted by limit"));
802 }
803}
804
805fn fits_target_chars(rendered: &str, target: usize) -> bool {
806 rendered.chars().count() <= target
807}
808
809fn render_single_node_candidate(
810 graph: &GraphFile,
811 node: &Node,
812 depth: usize,
813 detail: DetailLevel,
814 edge_cap: usize,
815) -> String {
816 let lines = render_single_node_candidate_lines(graph, node, depth, detail, edge_cap, None);
817 format!("{}\n", lines.join("\n"))
818}
819
820fn render_single_result_candidate(
821 graph: &GraphFile,
822 query: &str,
823 header: &str,
824 node: &ScoredNode<'_>,
825 total_available: usize,
826 depth: usize,
827 detail: DetailLevel,
828 edge_cap: usize,
829 full: bool,
830 debug_score: bool,
831) -> String {
832 let mut lines = vec![header.to_owned()];
833 if full {
834 lines.push(render_scored_node_block(
835 graph,
836 node,
837 true,
838 debug_score,
839 Some(query),
840 ));
841 } else {
842 lines.extend(render_scored_node_candidate_lines(
843 graph,
844 query,
845 node,
846 depth,
847 detail,
848 edge_cap,
849 debug_score,
850 ));
851 }
852 push_limit_omission_line(&mut lines, 1, total_available);
853 format!("{}\n", lines.join("\n"))
854}
855
856fn render_single_node_candidate_lines(
857 graph: &GraphFile,
858 node: &Node,
859 depth: usize,
860 detail: DetailLevel,
861 edge_cap: usize,
862 query: Option<&str>,
863) -> Vec<String> {
864 let mut lines = render_node_lines_with_edges(graph, node, detail, edge_cap, query);
865 if depth > 0 {
866 lines.extend(render_neighbor_layers(graph, node, depth, detail));
867 }
868 lines
869}
870
871fn render_scored_node_candidate_lines(
872 graph: &GraphFile,
873 query: &str,
874 node: &ScoredNode<'_>,
875 depth: usize,
876 detail: DetailLevel,
877 edge_cap: usize,
878 debug_score: bool,
879) -> Vec<String> {
880 let mut lines = vec![format!("score: {}", node.score)];
881 if debug_score {
882 lines.push(render_score_debug_line(node));
883 }
884 lines.extend(render_single_node_candidate_lines(
885 graph,
886 node.node,
887 depth,
888 detail,
889 edge_cap,
890 Some(query),
891 ));
892 lines
893}
894
895fn render_scored_node_block(
896 graph: &GraphFile,
897 node: &ScoredNode<'_>,
898 full: bool,
899 debug_score: bool,
900 query: Option<&str>,
901) -> String {
902 if debug_score {
903 format!(
904 "score: {}\n{}\n{}",
905 node.score,
906 render_score_debug_line(node),
907 render_node_block_with_query(graph, node.node, full, query)
908 )
909 } else {
910 format!(
911 "score: {}\n{}",
912 node.score,
913 render_node_block_with_query(graph, node.node, full, query)
914 )
915 }
916}
917
918fn render_score_debug_line(node: &ScoredNode<'_>) -> String {
919 format!(
920 "score_debug: raw_relevance={:.3} normalized_relevance={} lexical_boost={} feedback_boost={} importance_boost={} authority_raw={} authority_applied={} authority_cap={}",
921 node.breakdown.raw_relevance,
922 node.breakdown.normalized_relevance,
923 node.breakdown.lexical_boost,
924 node.breakdown.feedback_boost,
925 node.breakdown.importance_boost,
926 node.breakdown.authority_raw,
927 node.breakdown.authority_applied,
928 node.breakdown.authority_cap,
929 )
930}
931
932fn render_neighbor_layers(
933 graph: &GraphFile,
934 root: &Node,
935 max_depth: usize,
936 detail: DetailLevel,
937) -> Vec<String> {
938 let mut out = Vec::new();
939 let mut seen: HashSet<String> = HashSet::from([root.id.clone()]);
940 let mut queue: VecDeque<(String, usize)> = VecDeque::from([(root.id.clone(), 0usize)]);
941 let mut layers: Vec<Vec<&Node>> = vec![Vec::new(); max_depth + 1];
942
943 while let Some((node_id, depth)) = queue.pop_front() {
944 if depth >= max_depth {
945 continue;
946 }
947 for incident in incident_edges(graph, &node_id) {
948 if seen.insert(incident.related.id.clone()) {
949 let next_depth = depth + 1;
950 if next_depth <= max_depth {
951 layers[next_depth].push(incident.related);
952 queue.push_back((incident.related.id.clone(), next_depth));
953 }
954 }
955 }
956 }
957
958 for depth in 1..=max_depth {
959 if layers[depth].is_empty() {
960 continue;
961 }
962 let cap = match detail {
963 DetailLevel::Rich => 6,
964 DetailLevel::Compact => 4,
965 DetailLevel::Minimal => 3,
966 };
967 let shown = layers[depth].len().min(cap);
968 out.push(format!(
969 "depth {depth}: {shown}/{} neighbors",
970 layers[depth].len()
971 ));
972 for node in layers[depth].iter().take(shown) {
973 out.extend(render_node_identity_lines(node, detail));
974 }
975 if layers[depth].len() > shown {
976 out.push(format!(
977 "... +{} more neighbors omitted",
978 layers[depth].len() - shown
979 ));
980 }
981 }
982
983 out
984}
985
986fn render_node_lines_with_edges(
987 graph: &GraphFile,
988 node: &Node,
989 detail: DetailLevel,
990 edge_cap: usize,
991 query: Option<&str>,
992) -> Vec<String> {
993 let mut lines = render_node_identity_lines(node, detail);
994 lines.extend(render_node_link_lines(graph, node, edge_cap, query));
995 lines
996}
997
998fn render_node_identity_lines(node: &Node, detail: DetailLevel) -> Vec<String> {
999 let mut lines = Vec::new();
1000 match detail {
1001 DetailLevel::Rich => {
1002 lines.push(format!(
1003 "# {} | {} [{}]",
1004 node.id,
1005 escape_cli_text(&node.name),
1006 node.r#type
1007 ));
1008 if !node.properties.alias.is_empty() {
1009 lines.push(format!(
1010 "aka: {}",
1011 node.properties
1012 .alias
1013 .iter()
1014 .map(|alias| escape_cli_text(alias))
1015 .collect::<Vec<_>>()
1016 .join(", ")
1017 ));
1018 }
1019 push_description_line(&mut lines, &node.properties.description, None);
1020 let shown_facts = node.properties.key_facts.len().min(3);
1021 for fact in node.properties.key_facts.iter().take(shown_facts) {
1022 lines.push(format!("- {}", escape_cli_text(fact)));
1023 }
1024 let omitted = node.properties.key_facts.len().saturating_sub(shown_facts);
1025 if omitted > 0 {
1026 lines.push(format!("... {omitted} more facts omitted"));
1027 }
1028 }
1029 DetailLevel::Compact => {
1030 lines.push(format!(
1031 "# {} | {} [{}]",
1032 node.id,
1033 escape_cli_text(&node.name),
1034 node.r#type
1035 ));
1036 push_description_line(&mut lines, &node.properties.description, Some(140));
1037 if let Some(fact) = node.properties.key_facts.first() {
1038 lines.push(format!("- {}", escape_cli_text(fact)));
1039 }
1040 }
1041 DetailLevel::Minimal => {
1042 lines.push(format!(
1043 "# {} | {} [{}]",
1044 node.id,
1045 escape_cli_text(&node.name),
1046 node.r#type
1047 ));
1048 }
1049 }
1050 lines
1051}
1052
1053fn render_node_link_lines(
1054 graph: &GraphFile,
1055 node: &Node,
1056 edge_cap: usize,
1057 query: Option<&str>,
1058) -> Vec<String> {
1059 let mut incident = incident_edges(graph, &node.id);
1060 if let Some(query) = query {
1061 let query_terms = text_norm::expand_query_terms(query);
1062 if !query_terms.is_empty() {
1063 incident.sort_by(|left, right| {
1064 let right_relevance = incident_edge_query_relevance(right, &query_terms);
1065 let left_relevance = incident_edge_query_relevance(left, &query_terms);
1066 right_relevance
1067 .cmp(&left_relevance)
1068 .then_with(|| incident_edge_default_cmp(left, right))
1069 });
1070 }
1071 }
1072 if incident.is_empty() {
1073 return Vec::new();
1074 }
1075
1076 let mut lines = Vec::new();
1077 if incident.len() > 12 {
1078 lines.push(format!("links: {} total", incident.len()));
1079 let (out_summary, in_summary) = summarize_relations(&incident);
1080 if !out_summary.is_empty() {
1081 lines.push(format!("out: {out_summary}"));
1082 }
1083 if !in_summary.is_empty() {
1084 lines.push(format!("in: {in_summary}"));
1085 }
1086 }
1087
1088 let shown = incident.len().min(edge_cap);
1089 for edge in incident.into_iter().take(shown) {
1090 let prefix = if edge.incoming { "<-" } else { "->" };
1091 lines.extend(render_edge_lines(prefix, edge.edge, edge.related, false));
1092 }
1093 if edge_cap > 0 && incident_count(graph, &node.id) > shown {
1094 lines.push(format!(
1095 "... {} more links omitted",
1096 incident_count(graph, &node.id) - shown
1097 ));
1098 }
1099 lines
1100}
1101
1102fn incident_count(graph: &GraphFile, node_id: &str) -> usize {
1103 graph
1104 .edges
1105 .iter()
1106 .filter(|edge| edge.source_id == node_id || edge.target_id == node_id)
1107 .count()
1108}
1109
1110struct IncidentEdge<'a> {
1111 edge: &'a Edge,
1112 related: &'a Node,
1113 incoming: bool,
1114}
1115
1116fn incident_edges<'a>(graph: &'a GraphFile, node_id: &str) -> Vec<IncidentEdge<'a>> {
1117 let mut edges = Vec::new();
1118 for edge in &graph.edges {
1119 if edge.source_id == node_id {
1120 if let Some(related) = graph.node_by_id(&edge.target_id) {
1121 edges.push(IncidentEdge {
1122 edge,
1123 related,
1124 incoming: false,
1125 });
1126 }
1127 } else if edge.target_id == node_id {
1128 if let Some(related) = graph.node_by_id(&edge.source_id) {
1129 edges.push(IncidentEdge {
1130 edge,
1131 related,
1132 incoming: true,
1133 });
1134 }
1135 }
1136 }
1137 edges.sort_by(incident_edge_default_cmp);
1138 edges
1139}
1140
1141fn incident_edge_default_cmp(
1142 left: &IncidentEdge<'_>,
1143 right: &IncidentEdge<'_>,
1144) -> std::cmp::Ordering {
1145 right
1146 .related
1147 .properties
1148 .importance
1149 .partial_cmp(&left.related.properties.importance)
1150 .unwrap_or(std::cmp::Ordering::Equal)
1151 .then_with(|| left.edge.relation.cmp(&right.edge.relation))
1152 .then_with(|| left.related.id.cmp(&right.related.id))
1153}
1154
1155fn incident_edge_query_relevance(edge: &IncidentEdge<'_>, query_terms: &[String]) -> i64 {
1156 if query_terms.is_empty() {
1157 return 0;
1158 }
1159 let related = edge.related;
1160 let mut score = 0;
1161 score += query_overlap_score(&related.id, query_terms, 6);
1162 score += query_overlap_score(&related.name, query_terms, 5);
1163 score += query_overlap_score(&related.properties.description, query_terms, 2);
1164 score += query_overlap_score(&edge.edge.relation, query_terms, 2);
1165 score += query_overlap_score(&edge.edge.properties.detail, query_terms, 2);
1166 for alias in &related.properties.alias {
1167 score += query_overlap_score(alias, query_terms, 4);
1168 }
1169 score
1170}
1171
1172fn query_overlap_score(value: &str, query_terms: &[String], weight: i64) -> i64 {
1173 if value.is_empty() || query_terms.is_empty() {
1174 return 0;
1175 }
1176 let value_terms: HashSet<String> = tokenize(value).into_iter().collect();
1177 if value_terms.is_empty() {
1178 return 0;
1179 }
1180 let matches = query_terms
1181 .iter()
1182 .filter(|term| value_terms.contains(term.as_str()))
1183 .count() as i64;
1184 matches * weight
1185}
1186
1187fn summarize_relations(edges: &[IncidentEdge<'_>]) -> (String, String) {
1188 let mut out: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1189 let mut incoming: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1190
1191 for edge in edges {
1192 let bucket = if edge.incoming {
1193 &mut incoming
1194 } else {
1195 &mut out
1196 };
1197 *bucket.entry(edge.edge.relation.clone()).or_insert(0) += 1;
1198 }
1199
1200 (join_relation_counts(&out), join_relation_counts(&incoming))
1201}
1202
1203fn join_relation_counts(counts: &std::collections::BTreeMap<String, usize>) -> String {
1204 counts
1205 .iter()
1206 .take(3)
1207 .map(|(relation, count)| format!("{relation} x{count}"))
1208 .collect::<Vec<_>>()
1209 .join(", ")
1210}
1211
1212fn render_node_block(graph: &GraphFile, node: &Node, full: bool) -> String {
1213 render_node_block_with_query(graph, node, full, None)
1214}
1215
1216fn render_node_block_with_query(
1217 graph: &GraphFile,
1218 node: &Node,
1219 full: bool,
1220 query: Option<&str>,
1221) -> String {
1222 let mut lines = Vec::new();
1223 lines.push(format!(
1224 "# {} | {} [{}]",
1225 node.id,
1226 escape_cli_text(&node.name),
1227 node.r#type
1228 ));
1229
1230 if !node.properties.alias.is_empty() {
1231 lines.push(format!(
1232 "aka: {}",
1233 node.properties
1234 .alias
1235 .iter()
1236 .map(|alias| escape_cli_text(alias))
1237 .collect::<Vec<_>>()
1238 .join(", ")
1239 ));
1240 }
1241 push_description_line(
1242 &mut lines,
1243 &node.properties.description,
1244 if full { None } else { Some(200) },
1245 );
1246 if full {
1247 if !node.properties.domain_area.is_empty() {
1248 lines.push(format!(
1249 "domain_area: {}",
1250 escape_cli_text(&node.properties.domain_area)
1251 ));
1252 }
1253 if !node.properties.provenance.is_empty() {
1254 lines.push(format!(
1255 "provenance: {}",
1256 escape_cli_text(&node.properties.provenance)
1257 ));
1258 }
1259 if let Some(confidence) = node.properties.confidence {
1260 lines.push(format!("confidence: {confidence}"));
1261 }
1262 lines.push(format!("importance: {}", node.properties.importance));
1263 if !node.properties.created_at.is_empty() {
1264 lines.push(format!("created_at: {}", node.properties.created_at));
1265 }
1266 }
1267
1268 let facts_to_show = if full {
1269 node.properties.key_facts.len()
1270 } else {
1271 node.properties.key_facts.len().min(2)
1272 };
1273 for fact in node.properties.key_facts.iter().take(facts_to_show) {
1274 lines.push(format!("- {}", escape_cli_text(fact)));
1275 }
1276 let omitted = node
1277 .properties
1278 .key_facts
1279 .len()
1280 .saturating_sub(facts_to_show);
1281 if omitted > 0 {
1282 lines.push(format!("... {omitted} more facts omitted"));
1283 }
1284
1285 if full {
1286 if !node.source_files.is_empty() {
1287 lines.push(format!(
1288 "sources: {}",
1289 node.source_files
1290 .iter()
1291 .map(|source| escape_cli_text(source))
1292 .collect::<Vec<_>>()
1293 .join(", ")
1294 ));
1295 }
1296 push_feedback_lines(
1297 &mut lines,
1298 node.properties.feedback_score,
1299 node.properties.feedback_count,
1300 node.properties.feedback_last_ts_ms,
1301 None,
1302 );
1303 }
1304
1305 let attached_notes: Vec<_> = graph
1306 .notes
1307 .iter()
1308 .filter(|note| note.node_id == node.id)
1309 .collect();
1310 if full && !attached_notes.is_empty() {
1311 lines.push(format!("notes: {}", attached_notes.len()));
1312 for note in attached_notes {
1313 lines.extend(render_attached_note_lines(note));
1314 }
1315 }
1316
1317 for edge in outgoing_edges(graph, &node.id, full, query) {
1318 if let Some(target) = graph.node_by_id(&edge.target_id) {
1319 lines.extend(render_edge_lines("->", edge, target, full));
1320 }
1321 }
1322 for edge in incoming_edges(graph, &node.id, full, query) {
1323 if let Some(source) = graph.node_by_id(&edge.source_id) {
1324 lines.extend(render_edge_lines("<-", edge, source, full));
1325 }
1326 }
1327
1328 lines.join("\n")
1329}
1330
1331fn outgoing_edges<'a>(
1332 graph: &'a GraphFile,
1333 node_id: &str,
1334 full: bool,
1335 query: Option<&str>,
1336) -> Vec<&'a Edge> {
1337 let mut edges: Vec<&Edge> = graph
1338 .edges
1339 .iter()
1340 .filter(|edge| edge.source_id == node_id)
1341 .collect();
1342 if let Some(query) = query {
1343 let query_terms = text_norm::expand_query_terms(query);
1344 if !query_terms.is_empty() {
1345 edges.sort_by(|left, right| {
1346 let right_score = directed_edge_query_relevance(graph, right, false, &query_terms);
1347 let left_score = directed_edge_query_relevance(graph, left, false, &query_terms);
1348 right_score
1349 .cmp(&left_score)
1350 .then_with(|| left.relation.cmp(&right.relation))
1351 .then_with(|| left.target_id.cmp(&right.target_id))
1352 });
1353 } else {
1354 edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1355 }
1356 } else {
1357 edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1358 }
1359 if !full {
1360 edges.truncate(3);
1361 }
1362 edges
1363}
1364
1365fn incoming_edges<'a>(
1366 graph: &'a GraphFile,
1367 node_id: &str,
1368 full: bool,
1369 query: Option<&str>,
1370) -> Vec<&'a Edge> {
1371 let mut edges: Vec<&Edge> = graph
1372 .edges
1373 .iter()
1374 .filter(|edge| edge.target_id == node_id)
1375 .collect();
1376 if let Some(query) = query {
1377 let query_terms = text_norm::expand_query_terms(query);
1378 if !query_terms.is_empty() {
1379 edges.sort_by(|left, right| {
1380 let right_score = directed_edge_query_relevance(graph, right, true, &query_terms);
1381 let left_score = directed_edge_query_relevance(graph, left, true, &query_terms);
1382 right_score
1383 .cmp(&left_score)
1384 .then_with(|| left.relation.cmp(&right.relation))
1385 .then_with(|| left.source_id.cmp(&right.source_id))
1386 });
1387 } else {
1388 edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1389 }
1390 } else {
1391 edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1392 }
1393 if !full {
1394 edges.truncate(3);
1395 }
1396 edges
1397}
1398
1399fn directed_edge_query_relevance(
1400 graph: &GraphFile,
1401 edge: &Edge,
1402 incoming: bool,
1403 query_terms: &[String],
1404) -> i64 {
1405 let related = if incoming {
1406 graph.node_by_id(&edge.source_id)
1407 } else {
1408 graph.node_by_id(&edge.target_id)
1409 };
1410 let mut score = query_overlap_score(&edge.relation, query_terms, 2)
1411 + query_overlap_score(&edge.properties.detail, query_terms, 2);
1412 if let Some(node) = related {
1413 score += query_overlap_score(&node.id, query_terms, 6);
1414 score += query_overlap_score(&node.name, query_terms, 5);
1415 score += query_overlap_score(&node.properties.description, query_terms, 2);
1416 for alias in &node.properties.alias {
1417 score += query_overlap_score(alias, query_terms, 4);
1418 }
1419 }
1420 score
1421}
1422
1423fn render_edge_lines(prefix: &str, edge: &Edge, related: &Node, full: bool) -> Vec<String> {
1424 let (arrow, relation) = if edge.relation.starts_with("NOT_") {
1425 (
1426 format!("{prefix}!"),
1427 edge.relation.trim_start_matches("NOT_"),
1428 )
1429 } else {
1430 (prefix.to_owned(), edge.relation.as_str())
1431 };
1432
1433 let mut line = format!(
1434 "{arrow} {relation} | {} | {}",
1435 related.id,
1436 escape_cli_text(&related.name)
1437 );
1438 if !edge.properties.detail.is_empty() {
1439 line.push_str(" | ");
1440 let detail = escape_cli_text(&edge.properties.detail);
1441 if full {
1442 line.push_str(&detail);
1443 } else {
1444 line.push_str(&truncate(&detail, 80));
1445 }
1446 }
1447 let mut lines = vec![line];
1448 if full {
1449 push_feedback_lines(
1450 &mut lines,
1451 edge.properties.feedback_score,
1452 edge.properties.feedback_count,
1453 edge.properties.feedback_last_ts_ms,
1454 Some("edge_"),
1455 );
1456 if !edge.properties.valid_from.is_empty() {
1457 lines.push(format!("edge_valid_from: {}", edge.properties.valid_from));
1458 }
1459 if !edge.properties.valid_to.is_empty() {
1460 lines.push(format!("edge_valid_to: {}", edge.properties.valid_to));
1461 }
1462 }
1463 lines
1464}
1465
1466fn truncate(value: &str, max_len: usize) -> String {
1467 let char_count = value.chars().count();
1468 if char_count <= max_len {
1469 return value.to_owned();
1470 }
1471 let truncated: String = value.chars().take(max_len.saturating_sub(3)).collect();
1472 format!("{truncated}...")
1473}
1474
1475fn escape_cli_text(value: &str) -> String {
1476 let mut out = String::new();
1477 for ch in value.chars() {
1478 match ch {
1479 '\\' => out.push_str("\\\\"),
1480 '\n' => out.push_str("\\n"),
1481 '\r' => out.push_str("\\r"),
1482 '\t' => out.push_str("\\t"),
1483 _ => out.push(ch),
1484 }
1485 }
1486 out
1487}
1488
1489fn push_description_line(lines: &mut Vec<String>, description: &str, max_len: Option<usize>) {
1490 if description.is_empty() {
1491 return;
1492 }
1493 let escaped = escape_cli_text(description);
1494 let rendered = match max_len {
1495 Some(limit) => truncate(&escaped, limit),
1496 None => escaped,
1497 };
1498 lines.push(format!("desc: {rendered}"));
1499}
1500
1501fn push_feedback_lines(
1502 lines: &mut Vec<String>,
1503 score: f64,
1504 count: u64,
1505 last_ts_ms: Option<u64>,
1506 prefix: Option<&str>,
1507) {
1508 let prefix = prefix.unwrap_or("");
1509 if score != 0.0 {
1510 lines.push(format!("{prefix}feedback_score: {score}"));
1511 }
1512 if count != 0 {
1513 lines.push(format!("{prefix}feedback_count: {count}"));
1514 }
1515 if let Some(ts) = last_ts_ms {
1516 lines.push(format!("{prefix}feedback_last_ts_ms: {ts}"));
1517 }
1518}
1519
1520fn render_attached_note_lines(note: &crate::graph::Note) -> Vec<String> {
1521 let mut lines = vec![format!("! {}", note.id)];
1522 if !note.body.is_empty() {
1523 lines.push(format!("note_body: {}", escape_cli_text(¬e.body)));
1524 }
1525 if !note.tags.is_empty() {
1526 lines.push(format!(
1527 "note_tags: {}",
1528 note.tags
1529 .iter()
1530 .map(|tag| escape_cli_text(tag))
1531 .collect::<Vec<_>>()
1532 .join(", ")
1533 ));
1534 }
1535 if !note.author.is_empty() {
1536 lines.push(format!("note_author: {}", escape_cli_text(¬e.author)));
1537 }
1538 if !note.created_at.is_empty() {
1539 lines.push(format!("note_created_at: {}", note.created_at));
1540 }
1541 if !note.provenance.is_empty() {
1542 lines.push(format!(
1543 "note_provenance: {}",
1544 escape_cli_text(¬e.provenance)
1545 ));
1546 }
1547 if !note.source_files.is_empty() {
1548 lines.push(format!(
1549 "note_sources: {}",
1550 note.source_files
1551 .iter()
1552 .map(|source| escape_cli_text(source))
1553 .collect::<Vec<_>>()
1554 .join(", ")
1555 ));
1556 }
1557 lines
1558}
1559
1560fn find_matches_with_index<'a>(
1561 graph: &'a GraphFile,
1562 query: &str,
1563 limit: usize,
1564 include_features: bool,
1565 include_metadata: bool,
1566 mode: FindMode,
1567 index: Option<&Bm25Index>,
1568 tune: Option<&FindTune>,
1569) -> Vec<ScoredNode<'a>> {
1570 let mut matches = find_all_matches_with_index(
1571 graph,
1572 query,
1573 include_features,
1574 include_metadata,
1575 mode,
1576 index,
1577 tune,
1578 );
1579 matches.truncate(limit);
1580 matches
1581}
1582
1583fn find_all_matches_with_index<'a>(
1584 graph: &'a GraphFile,
1585 query: &str,
1586 include_features: bool,
1587 include_metadata: bool,
1588 mode: FindMode,
1589 index: Option<&Bm25Index>,
1590 tune: Option<&FindTune>,
1591) -> Vec<ScoredNode<'a>> {
1592 let context = FindQueryContext::build(graph);
1593 let rewritten_query = rewrite_query(query);
1594 let fuzzy_query = if rewritten_query.is_empty() {
1595 query.to_owned()
1596 } else {
1597 rewritten_query
1598 };
1599 let mut scored: Vec<ScoredNode<'a>> = match mode {
1600 FindMode::Fuzzy => {
1601 let pattern = Pattern::parse(&fuzzy_query, CaseMatching::Ignore, Normalization::Smart);
1602 let mut matcher = Matcher::new(Config::DEFAULT);
1603 let candidates = graph
1604 .nodes
1605 .iter()
1606 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1607 .filter_map(|node| {
1608 score_node(&context, node, &fuzzy_query, &pattern, &mut matcher).map(|score| {
1609 RawCandidate {
1610 node,
1611 raw_relevance: score as f64,
1612 lexical_boost: 0,
1613 }
1614 })
1615 })
1616 .collect();
1617 compose_scores(candidates)
1618 }
1619 FindMode::Bm25 => compose_scores(score_bm25_raw(
1620 graph,
1621 &context,
1622 &fuzzy_query,
1623 include_features,
1624 include_metadata,
1625 index,
1626 )),
1627 FindMode::Hybrid => compose_scores(score_hybrid_raw(
1628 graph,
1629 &context,
1630 &fuzzy_query,
1631 include_features,
1632 include_metadata,
1633 index,
1634 tune.copied().unwrap_or_default(),
1635 )),
1636 };
1637
1638 scored.sort_by(|left, right| {
1639 right
1640 .score
1641 .cmp(&left.score)
1642 .then_with(|| left.node.id.cmp(&right.node.id))
1643 });
1644 let mut seen_ids = HashSet::new();
1645 scored.retain(|item| {
1646 let key = crate::validate::normalize_node_id(&item.node.id).to_ascii_lowercase();
1647 seen_ids.insert(key)
1648 });
1649 scored
1650}
1651
1652fn compose_scores<'a>(candidates: Vec<RawCandidate<'a>>) -> Vec<ScoredNode<'a>> {
1653 let max_raw = candidates
1654 .iter()
1655 .map(|candidate| candidate.raw_relevance)
1656 .fold(0.0f64, f64::max);
1657 let max_raw_log = max_raw.ln_1p();
1658
1659 candidates
1660 .into_iter()
1661 .filter_map(|candidate| {
1662 if candidate.raw_relevance <= 0.0 {
1663 return None;
1664 }
1665 let normalized_relevance = if max_raw_log > 0.0 {
1666 ((candidate.raw_relevance.ln_1p() / max_raw_log) * 1000.0).round() as i64
1667 } else {
1668 0
1669 };
1670 let feedback = feedback_boost(candidate.node);
1671 let importance = importance_boost(candidate.node);
1672 let authority_raw = feedback + importance;
1673 let relative_cap =
1674 ((normalized_relevance as f64) * SCORE_META_MAX_RATIO).round() as i64;
1675 let authority_cap = relative_cap.max(SCORE_META_MIN_CAP).min(SCORE_META_MAX_CAP);
1676 let authority_applied = authority_raw.clamp(-authority_cap, authority_cap);
1677 let final_score = normalized_relevance + authority_applied;
1678
1679 Some(ScoredNode {
1680 score: final_score,
1681 node: candidate.node,
1682 breakdown: ScoreBreakdown {
1683 raw_relevance: candidate.raw_relevance,
1684 normalized_relevance,
1685 lexical_boost: candidate.lexical_boost,
1686 feedback_boost: feedback,
1687 importance_boost: importance,
1688 authority_raw,
1689 authority_applied,
1690 authority_cap,
1691 },
1692 })
1693 })
1694 .collect()
1695}
1696
1697fn feedback_boost(node: &Node) -> i64 {
1698 let count = node.properties.feedback_count as f64;
1699 if count <= 0.0 {
1700 return 0;
1701 }
1702 let avg = node.properties.feedback_score / count;
1703 let confidence = (count.ln_1p() / 3.0).min(1.0);
1704 let scaled = avg * 200.0 * confidence;
1705 scaled.clamp(-300.0, 300.0).round() as i64
1706}
1707
1708fn importance_boost(node: &Node) -> i64 {
1709 let normalized_importance = if (0.0..=1.0).contains(&node.properties.importance) {
1710 node.properties.importance
1711 } else if (1.0..=6.0).contains(&node.properties.importance) {
1712 (node.properties.importance - 1.0) / 5.0
1713 } else {
1714 node.properties.importance.clamp(0.0, 1.0)
1715 };
1716 let normalized = (normalized_importance - IMPORTANCE_NEUTRAL) * 2.0;
1717 (normalized * IMPORTANCE_MAX_ABS_BOOST).round() as i64
1718}
1719
1720fn score_bm25_raw<'a>(
1721 graph: &'a GraphFile,
1722 context: &FindQueryContext<'a>,
1723 query: &str,
1724 include_features: bool,
1725 include_metadata: bool,
1726 index: Option<&Bm25Index>,
1727) -> Vec<RawCandidate<'a>> {
1728 let terms = text_norm::expand_query_terms(query);
1729 if terms.is_empty() {
1730 return Vec::new();
1731 }
1732
1733 if let Some(idx) = index {
1734 let results = idx.search(&terms, graph);
1735 return results
1736 .into_iter()
1737 .filter_map(|(node_id, score)| {
1738 let node = graph.node_by_id(&node_id)?;
1739 if !node_is_searchable(node, include_features, include_metadata) {
1740 return None;
1741 }
1742 let self_terms = node_self_document_terms(context, node);
1743 let neighbor_score =
1744 best_neighbor_bm25_score_with_index(context, node, &terms, idx);
1745 let base_score = combine_bm25_components(node, score as f64, neighbor_score);
1746 if base_score <= 0.0 {
1747 return None;
1748 }
1749 let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1750 idx.idf.get(term).copied().unwrap_or(0.0) as f64
1751 });
1752 let proximity_boost = bm25_proximity_boost(context, node, &terms);
1753 Some(RawCandidate {
1754 node,
1755 raw_relevance: base_score * 100.0
1756 + lexical_boost as f64
1757 + proximity_boost as f64,
1758 lexical_boost: lexical_boost + proximity_boost,
1759 })
1760 })
1761 .collect();
1762 }
1763
1764 let docs: Vec<(&'a Node, Vec<String>)> = graph
1765 .nodes
1766 .iter()
1767 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1768 .map(|node| (node, node_self_document_terms(context, node)))
1769 .collect();
1770
1771 if docs.is_empty() {
1772 return Vec::new();
1773 }
1774
1775 let mut df: HashMap<String, usize> = HashMap::new();
1776 for term in &terms {
1777 let mut count = 0usize;
1778 for (_, tokens) in &docs {
1779 if tokens.iter().any(|t| t == term) {
1780 count += 1;
1781 }
1782 }
1783 df.insert(term.clone(), count);
1784 }
1785
1786 let total_docs = docs.len() as f64;
1787 let avgdl = docs
1788 .iter()
1789 .map(|(_, tokens)| tokens.len() as f64)
1790 .sum::<f64>()
1791 / total_docs.max(1.0);
1792
1793 let mut idf_by_term: HashMap<String, f64> = HashMap::new();
1794 for term in &terms {
1795 let df_t = *df.get(term).unwrap_or(&0) as f64;
1796 let idf = (1.0 + (total_docs - df_t + 0.5) / (df_t + 0.5)).ln();
1797 idf_by_term.insert(term.clone(), idf);
1798 }
1799
1800 let mut scored = Vec::new();
1801
1802 for (node, self_terms) in docs {
1803 let self_score = bm25_document_score(&terms, &self_terms, &idf_by_term, avgdl);
1804 let neighbor_score = best_neighbor_bm25_score(context, node, &terms, &idf_by_term, avgdl);
1805 let base_score = combine_bm25_components(node, self_score, neighbor_score);
1806 if base_score <= 0.0 {
1807 continue;
1808 }
1809 let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1810 idf_by_term.get(term).copied().unwrap_or(0.0)
1811 });
1812 let proximity_boost = bm25_proximity_boost(context, node, &terms);
1813 scored.push(RawCandidate {
1814 node,
1815 raw_relevance: base_score * 100.0 + lexical_boost as f64 + proximity_boost as f64,
1816 lexical_boost: lexical_boost + proximity_boost,
1817 });
1818 }
1819
1820 scored
1821}
1822
1823fn score_hybrid_raw<'a>(
1824 graph: &'a GraphFile,
1825 context: &FindQueryContext<'a>,
1826 query: &str,
1827 include_features: bool,
1828 include_metadata: bool,
1829 index: Option<&Bm25Index>,
1830 tune: FindTune,
1831) -> Vec<RawCandidate<'a>> {
1832 let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
1833 let mut matcher = Matcher::new(Config::DEFAULT);
1834
1835 let mut fuzzy_raw = HashMap::new();
1836 for node in graph
1837 .nodes
1838 .iter()
1839 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1840 {
1841 if let Some(score) = score_node(context, node, query, &pattern, &mut matcher) {
1842 fuzzy_raw.insert(node.id.as_str(), score as f64);
1843 }
1844 }
1845
1846 let bm25_candidates = score_bm25_raw(
1847 graph,
1848 context,
1849 query,
1850 include_features,
1851 include_metadata,
1852 index,
1853 );
1854 let mut bm25_raw = HashMap::new();
1855 let mut lexical_boost = HashMap::new();
1856 for candidate in bm25_candidates {
1857 bm25_raw.insert(candidate.node.id.as_str(), candidate.raw_relevance);
1858 lexical_boost.insert(candidate.node.id.as_str(), candidate.lexical_boost);
1859 }
1860
1861 let fuzzy_norm = normalize_raw_scores(&fuzzy_raw);
1862 let bm25_norm = normalize_raw_scores(&bm25_raw);
1863 let total_weight = (tune.bm25 + tune.fuzzy).max(0.0001);
1864
1865 graph
1866 .nodes
1867 .iter()
1868 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1869 .filter_map(|node| {
1870 let f = fuzzy_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1871 let b = bm25_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1872 let combined = ((tune.fuzzy * f) + (tune.bm25 * b)) / total_weight;
1873 if combined <= 0.0 {
1874 return None;
1875 }
1876 Some(RawCandidate {
1877 node,
1878 raw_relevance: combined * 1000.0,
1879 lexical_boost: lexical_boost.get(node.id.as_str()).copied().unwrap_or(0),
1880 })
1881 })
1882 .collect()
1883}
1884
1885fn normalize_raw_scores<'a>(raw: &'a HashMap<&'a str, f64>) -> HashMap<&'a str, f64> {
1886 let max_raw = raw.values().copied().fold(0.0f64, f64::max);
1887 let max_log = max_raw.ln_1p();
1888 raw.iter()
1889 .map(|(id, value)| {
1890 let normalized = if max_log > 0.0 {
1891 value.ln_1p() / max_log
1892 } else {
1893 0.0
1894 };
1895 (*id, normalized.clamp(0.0, 1.0))
1896 })
1897 .collect()
1898}
1899
1900fn node_is_searchable(node: &Node, include_features: bool, include_metadata: bool) -> bool {
1901 (include_features || node.r#type != "Feature") && (include_metadata || node.r#type != "^")
1902}
1903
1904fn node_self_document_terms(context: &FindQueryContext<'_>, node: &Node) -> Vec<String> {
1905 let mut tokens = Vec::new();
1906 push_terms(&mut tokens, &node.id, BM25_ID_WEIGHT);
1907 push_terms(&mut tokens, &node.name, BM25_NAME_WEIGHT);
1908 push_terms(
1909 &mut tokens,
1910 &node.properties.description,
1911 BM25_DESCRIPTION_WEIGHT,
1912 );
1913 for alias in &node.properties.alias {
1914 push_terms(&mut tokens, alias, BM25_ALIAS_WEIGHT);
1915 }
1916 for fact in &node.properties.key_facts {
1917 push_terms(&mut tokens, fact, BM25_FACT_WEIGHT);
1918 }
1919 for note in context.notes_for(&node.id) {
1920 push_terms(&mut tokens, ¬e.body, BM25_NOTE_BODY_WEIGHT);
1921 for tag in ¬e.tags {
1922 push_terms(&mut tokens, tag, BM25_NOTE_TAG_WEIGHT);
1923 }
1924 }
1925 tokens
1926}
1927
1928fn neighbor_document_terms(neighbor: &Node) -> Vec<String> {
1929 let mut tokens = Vec::new();
1930 push_terms(&mut tokens, &neighbor.id, BM25_NEIGHBOR_WEIGHT);
1931 push_terms(&mut tokens, &neighbor.name, BM25_NEIGHBOR_WEIGHT);
1932 push_terms(
1933 &mut tokens,
1934 &neighbor.properties.description,
1935 BM25_NEIGHBOR_WEIGHT,
1936 );
1937 for alias in &neighbor.properties.alias {
1938 push_terms(&mut tokens, alias, BM25_NEIGHBOR_WEIGHT);
1939 }
1940 tokens
1941}
1942
1943fn fact_volume_normalizer(node: &Node) -> f64 {
1944 let fact_chars = node
1945 .properties
1946 .key_facts
1947 .iter()
1948 .map(|fact| fact.chars().count())
1949 .sum::<usize>() as f64;
1950 if fact_chars <= 0.0 {
1951 return 1.0;
1952 }
1953 let scaled = FACT_VOLUME_BASE_CHARS.sqrt() / fact_chars.sqrt();
1954 scaled.clamp(FACT_VOLUME_MIN_FACTOR, 1.0)
1955}
1956
1957fn bm25_document_score(
1958 query_terms: &[String],
1959 document_terms: &[String],
1960 idf_by_term: &HashMap<String, f64>,
1961 avgdl: f64,
1962) -> f64 {
1963 if query_terms.is_empty() || document_terms.is_empty() {
1964 return 0.0;
1965 }
1966 let dl = document_terms.len() as f64;
1967 if dl <= 0.0 {
1968 return 0.0;
1969 }
1970 let mut score = 0.0;
1971 for term in query_terms {
1972 let tf = document_terms.iter().filter(|token| *token == term).count() as f64;
1973 if tf <= 0.0 {
1974 continue;
1975 }
1976 let idf = idf_by_term.get(term).copied().unwrap_or(0.0);
1977 if idf <= 0.0 {
1978 continue;
1979 }
1980 let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
1981 score += idf * (tf * (BM25_K1 + 1.0) / denom);
1982 }
1983 score
1984}
1985
1986fn best_neighbor_bm25_score(
1987 context: &FindQueryContext<'_>,
1988 node: &Node,
1989 query_terms: &[String],
1990 idf_by_term: &HashMap<String, f64>,
1991 avgdl: f64,
1992) -> f64 {
1993 context
1994 .neighbors_for(&node.id)
1995 .iter()
1996 .map(|neighbor| {
1997 let neighbor_terms = neighbor_document_terms(neighbor);
1998 bm25_document_score(query_terms, &neighbor_terms, idf_by_term, avgdl)
1999 })
2000 .fold(0.0f64, f64::max)
2001}
2002
2003fn best_neighbor_bm25_score_with_index(
2004 context: &FindQueryContext<'_>,
2005 node: &Node,
2006 query_terms: &[String],
2007 index: &Bm25Index,
2008) -> f64 {
2009 let avgdl = index.avg_doc_len as f64;
2010 context
2011 .neighbors_for(&node.id)
2012 .iter()
2013 .map(|neighbor| {
2014 let neighbor_terms = neighbor_document_terms(neighbor);
2015 let dl = neighbor_terms.len() as f64;
2016 if dl <= 0.0 {
2017 return 0.0;
2018 }
2019 let mut score = 0.0;
2020 for term in query_terms {
2021 let idf = index.idf.get(term).copied().unwrap_or(0.0) as f64;
2022 if idf <= 0.0 {
2023 continue;
2024 }
2025 let tf = neighbor_terms.iter().filter(|token| *token == term).count() as f64;
2026 if tf <= 0.0 {
2027 continue;
2028 }
2029 let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
2030 score += idf * (tf * (BM25_K1 + 1.0) / denom);
2031 }
2032 score
2033 })
2034 .fold(0.0f64, f64::max)
2035}
2036
2037fn combine_bm25_components(node: &Node, self_score: f64, neighbor_score: f64) -> f64 {
2038 let combined =
2039 BM25_SELF_CONTEXT_WEIGHT * self_score + BM25_NEIGHBOR_CONTEXT_WEIGHT * neighbor_score;
2040 combined * fact_volume_normalizer(node)
2041}
2042
2043fn push_terms(target: &mut Vec<String>, value: &str, weight: usize) {
2044 if value.is_empty() {
2045 return;
2046 }
2047 let terms = tokenize(value);
2048 for _ in 0..weight {
2049 target.extend(terms.iter().cloned());
2050 }
2051}
2052
2053fn tokenize(text: &str) -> Vec<String> {
2054 text_norm::tokenize(text)
2055}
2056
2057fn rewrite_query(query: &str) -> String {
2058 text_norm::expand_query_terms(query).join(" ")
2059}
2060
2061fn bm25_lexical_boost_with_idf<F>(
2062 query_terms: &[String],
2063 document_terms: &[String],
2064 idf_for: F,
2065) -> i64
2066where
2067 F: Fn(&str) -> f64,
2068{
2069 if query_terms.is_empty() || document_terms.is_empty() {
2070 return 0;
2071 }
2072 if query_terms.len() > 1 && contains_token_phrase(document_terms, query_terms) {
2073 return BM25_PHRASE_MATCH_BOOST;
2074 }
2075 let document_vocab: HashSet<&str> = document_terms.iter().map(String::as_str).collect();
2076 let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2077 let mut total_idf = 0.0;
2078 let mut matched_idf = 0.0;
2079 let mut matched_terms = 0i64;
2080 for term in query_vocab {
2081 let idf = idf_for(term).max(0.0);
2082 total_idf += if idf > 0.0 { idf } else { 1.0 };
2083 if document_vocab.contains(term) {
2084 matched_terms += 1;
2085 matched_idf += if idf > 0.0 { idf } else { 1.0 };
2086 }
2087 }
2088 if matched_terms == 0 {
2089 return 0;
2090 }
2091 ((matched_idf / total_idf.max(1.0)) * BM25_TOKEN_MATCH_BOOST as f64).round() as i64
2092}
2093
2094fn bm25_proximity_boost(
2095 context: &FindQueryContext<'_>,
2096 node: &Node,
2097 query_terms: &[String],
2098) -> i64 {
2099 if query_terms.len() < 2 {
2100 return 0;
2101 }
2102 let mut best_span_hits = proximity_hits_in_text(&node.id, query_terms)
2103 .max(proximity_hits_in_text(&node.name, query_terms))
2104 .max(proximity_hits_in_text(
2105 &node.properties.description,
2106 query_terms,
2107 ));
2108 for alias in &node.properties.alias {
2109 best_span_hits = best_span_hits.max(proximity_hits_in_text(alias, query_terms));
2110 }
2111 for fact in &node.properties.key_facts {
2112 best_span_hits = best_span_hits.max(proximity_hits_in_text(fact, query_terms));
2113 }
2114 for note in context.notes_for(&node.id) {
2115 best_span_hits = best_span_hits.max(proximity_hits_in_text(¬e.body, query_terms));
2116 for tag in ¬e.tags {
2117 best_span_hits = best_span_hits.max(proximity_hits_in_text(tag, query_terms));
2118 }
2119 }
2120 if best_span_hits < 2 {
2121 0
2122 } else {
2123 BM25_PROXIMITY_MATCH_BOOST + (best_span_hits as i64 - 2) * 20
2124 }
2125}
2126
2127fn proximity_hits_in_text(value: &str, query_terms: &[String]) -> usize {
2128 if value.is_empty() || query_terms.len() < 2 {
2129 return 0;
2130 }
2131 let tokens = tokenize(value);
2132 if tokens.len() < 2 {
2133 return 0;
2134 }
2135 let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2136 let mut best = 0usize;
2137 for start in 0..tokens.len() {
2138 let end = (start + BM25_PROXIMITY_WINDOW_TOKENS).min(tokens.len());
2139 let mut seen: HashSet<&str> = HashSet::new();
2140 for token in &tokens[start..end] {
2141 if query_vocab.contains(token.as_str()) {
2142 seen.insert(token.as_str());
2143 }
2144 }
2145 best = best.max(seen.len());
2146 }
2147 best
2148}
2149
2150fn contains_token_phrase(document_terms: &[String], query_terms: &[String]) -> bool {
2151 if query_terms.is_empty() || query_terms.len() > document_terms.len() {
2152 return false;
2153 }
2154 document_terms
2155 .windows(query_terms.len())
2156 .any(|window| window == query_terms)
2157}
2158
2159fn score_node(
2160 context: &FindQueryContext<'_>,
2161 node: &Node,
2162 query: &str,
2163 pattern: &Pattern,
2164 matcher: &mut Matcher,
2165) -> Option<u32> {
2166 let mut primary_score = 0;
2167 let mut primary_hits = 0;
2168
2169 let id_score = score_primary_field(query, pattern, matcher, &node.id, 5);
2170 if id_score > 0 {
2171 primary_hits += 1;
2172 }
2173 primary_score += id_score;
2174
2175 let name_score = score_primary_field(query, pattern, matcher, &node.name, 4);
2176 if name_score > 0 {
2177 primary_hits += 1;
2178 }
2179 primary_score += name_score;
2180
2181 for alias in &node.properties.alias {
2182 let alias_score = score_primary_field(query, pattern, matcher, alias, 4);
2183 if alias_score > 0 {
2184 primary_hits += 1;
2185 }
2186 primary_score += alias_score;
2187 }
2188
2189 let mut contextual_score = score_secondary_field(
2190 query,
2191 pattern,
2192 matcher,
2193 &node.properties.description,
2194 FUZZY_DESCRIPTION_WEIGHT,
2195 );
2196 let mut facts_score = 0;
2197 for fact in &node.properties.key_facts {
2198 facts_score += score_secondary_field(query, pattern, matcher, fact, FUZZY_FACT_WEIGHT);
2199 }
2200 let facts_factor = fact_volume_normalizer(node);
2201 contextual_score += ((facts_score as f64) * facts_factor).round() as u32;
2202 contextual_score += score_notes_context(context, node, query, pattern, matcher);
2203
2204 let neighbor_context = score_neighbor_context(context, node, query, pattern, matcher)
2205 .min(FUZZY_NEIGHBOR_CONTEXT_CAP);
2206 contextual_score += neighbor_context / FUZZY_NEIGHBOR_CONTEXT_DIVISOR;
2207
2208 if primary_hits == 0 {
2209 contextual_score /= FUZZY_NO_PRIMARY_CONTEXT_DIVISOR;
2210 }
2211
2212 let total = primary_score + contextual_score;
2213 (total > 0).then_some(total)
2214}
2215
2216fn score_notes_context(
2217 context: &FindQueryContext<'_>,
2218 node: &Node,
2219 query: &str,
2220 pattern: &Pattern,
2221 matcher: &mut Matcher,
2222) -> u32 {
2223 let mut total = 0;
2224 for note in context.notes_for(&node.id) {
2225 total += score_secondary_field(query, pattern, matcher, ¬e.body, FUZZY_NOTE_BODY_WEIGHT);
2226 for tag in ¬e.tags {
2227 total += score_secondary_field(query, pattern, matcher, tag, FUZZY_NOTE_TAG_WEIGHT);
2228 }
2229 }
2230 total
2231}
2232
2233fn score_neighbor_context(
2234 context: &FindQueryContext<'_>,
2235 node: &Node,
2236 query: &str,
2237 pattern: &Pattern,
2238 matcher: &mut Matcher,
2239) -> u32 {
2240 let mut best = 0;
2241
2242 for neighbor in context.neighbors_for(&node.id) {
2243 let mut score = score_secondary_field(query, pattern, matcher, &neighbor.id, 1)
2244 + score_secondary_field(query, pattern, matcher, &neighbor.name, 1)
2245 + score_secondary_field(query, pattern, matcher, &neighbor.properties.description, 1);
2246
2247 for alias in &neighbor.properties.alias {
2248 score += score_secondary_field(query, pattern, matcher, alias, 1);
2249 }
2250
2251 best = best.max(score);
2252 }
2253
2254 best
2255}
2256
2257fn score_field(pattern: &Pattern, matcher: &mut Matcher, value: &str) -> Option<u32> {
2258 if value.is_empty() {
2259 return None;
2260 }
2261 let mut buf = Vec::new();
2262 let haystack = Utf32Str::new(value, &mut buf);
2263 pattern.score(haystack, matcher)
2264}
2265
2266fn score_primary_field(
2267 query: &str,
2268 pattern: &Pattern,
2269 matcher: &mut Matcher,
2270 value: &str,
2271 weight: u32,
2272) -> u32 {
2273 let bonus = textual_bonus(query, value);
2274 let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2275 if bonus == 0 && fuzzy == 0 {
2276 return 0;
2277 }
2278 (fuzzy + bonus) * weight
2279}
2280
2281fn score_secondary_field(
2282 query: &str,
2283 pattern: &Pattern,
2284 matcher: &mut Matcher,
2285 value: &str,
2286 weight: u32,
2287) -> u32 {
2288 let bonus = textual_bonus(query, value);
2289 let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2290 if bonus == 0 && fuzzy == 0 {
2291 return 0;
2292 }
2293 (fuzzy + bonus / 2) * weight
2294}
2295
2296fn textual_bonus(query: &str, value: &str) -> u32 {
2297 let query = query.trim().to_lowercase();
2298 let value = value.to_lowercase();
2299
2300 if value == query {
2301 return 400;
2302 }
2303 if value.contains(&query) {
2304 return 200;
2305 }
2306
2307 query
2308 .split_whitespace()
2309 .map(|token| {
2310 if value.contains(token) {
2311 80
2312 } else if is_subsequence(token, &value) {
2313 40
2314 } else {
2315 0
2316 }
2317 })
2318 .sum()
2319}
2320
2321fn is_subsequence(needle: &str, haystack: &str) -> bool {
2322 if needle.is_empty() {
2323 return false;
2324 }
2325
2326 let mut chars = needle.chars();
2327 let mut current = match chars.next() {
2328 Some(ch) => ch,
2329 None => return false,
2330 };
2331
2332 for ch in haystack.chars() {
2333 if ch == current {
2334 match chars.next() {
2335 Some(next) => current = next,
2336 None => return true,
2337 }
2338 }
2339 }
2340
2341 false
2342}
2343
2344#[cfg(test)]
2345mod tests {
2346 use super::*;
2347
2348 fn make_node(
2349 id: &str,
2350 name: &str,
2351 description: &str,
2352 key_facts: &[&str],
2353 alias: &[&str],
2354 importance: f64,
2355 feedback_score: f64,
2356 feedback_count: u64,
2357 ) -> Node {
2358 let mut properties = crate::graph::NodeProperties::default();
2359 properties.description = description.to_owned();
2360 properties.key_facts = key_facts.iter().map(|v| (*v).to_owned()).collect();
2361 properties.alias = alias.iter().map(|v| (*v).to_owned()).collect();
2362 properties.importance = importance;
2363 properties.feedback_score = feedback_score;
2364 properties.feedback_count = feedback_count;
2365 Node {
2366 id: id.to_owned(),
2367 r#type: "Concept".to_owned(),
2368 name: name.to_owned(),
2369 properties,
2370 source_files: Vec::new(),
2371 }
2372 }
2373
2374 fn make_edge(source_id: &str, relation: &str, target_id: &str) -> Edge {
2375 Edge {
2376 source_id: source_id.to_owned(),
2377 relation: relation.to_owned(),
2378 target_id: target_id.to_owned(),
2379 properties: crate::graph::EdgeProperties::default(),
2380 }
2381 }
2382
2383 fn score_for(results: &[ScoredNode<'_>], id: &str) -> i64 {
2384 results
2385 .iter()
2386 .find(|item| item.node.id == id)
2387 .map(|item| item.score)
2388 .expect("score for node")
2389 }
2390
2391 #[test]
2392 fn textual_bonus_tiers_are_stable() {
2393 assert_eq!(textual_bonus("abc", "abc"), 400);
2394 assert_eq!(textual_bonus("abc", "xxabcxx"), 200);
2395 assert_eq!(textual_bonus("abc def", "aa abc and def zz"), 160);
2396 assert_eq!(textual_bonus("abc", "aXbYc"), 40);
2397 assert_eq!(textual_bonus("abc", "zzz"), 0);
2398 }
2399
2400 #[test]
2401 fn tokenize_handles_unicode_casefolding() {
2402 let tokens = tokenize("ŁÓDŹ smart-home");
2403 assert_eq!(tokens, vec!["łódź", "smart", "home"]);
2404 }
2405
2406 #[test]
2407 fn bm25_lexical_boost_prefers_phrase_then_tokens() {
2408 let query_terms = tokenize("smart home api");
2409 assert_eq!(
2410 bm25_lexical_boost_with_idf(&query_terms, &tokenize("x smart home api y"), |_| 1.0),
2411 120
2412 );
2413 assert_eq!(
2414 bm25_lexical_boost_with_idf(&query_terms, &tokenize("smart x api y home"), |_| 1.0),
2415 45
2416 );
2417 assert_eq!(
2418 bm25_lexical_boost_with_idf(&query_terms, &tokenize("nothing here"), |_| 1.0),
2419 0
2420 );
2421 }
2422
2423 #[test]
2424 fn score_node_uses_key_facts_and_notes_without_primary_match() {
2425 let node = make_node(
2426 "concept:gateway",
2427 "Gateway",
2428 "",
2429 &["Autentykacja OAuth2 przez konto producenta"],
2430 &[],
2431 0.5,
2432 0.0,
2433 0,
2434 );
2435 let mut graph = GraphFile::new("test");
2436 graph.nodes.push(node.clone());
2437 graph.notes.push(crate::graph::Note {
2438 id: "note:oauth".to_owned(),
2439 node_id: node.id.clone(),
2440 body: "Token refresh przez OAuth2".to_owned(),
2441 tags: vec!["oauth2".to_owned()],
2442 ..Default::default()
2443 });
2444
2445 let pattern = Pattern::parse(
2446 "oauth2 producenta",
2447 CaseMatching::Ignore,
2448 Normalization::Smart,
2449 );
2450 let context = FindQueryContext::build(&graph);
2451 let mut matcher = Matcher::new(Config::DEFAULT);
2452 let score = score_node(&context, &node, "oauth2 producenta", &pattern, &mut matcher);
2453 assert!(score.is_some_and(|value| value > 0));
2454
2455 let empty_graph = GraphFile::new("empty");
2456 let empty_node = make_node("concept:gateway", "Gateway", "", &[], &[], 0.5, 0.0, 0);
2457 let empty_context = FindQueryContext::build(&empty_graph);
2458 let mut matcher = Matcher::new(Config::DEFAULT);
2459 let empty_score = score_node(
2460 &empty_context,
2461 &empty_node,
2462 "oauth2 producenta",
2463 &pattern,
2464 &mut matcher,
2465 );
2466 assert!(empty_score.is_none());
2467 }
2468
2469 #[test]
2470 fn score_bm25_respects_importance_boost_for_equal_documents() {
2471 let mut graph = GraphFile::new("test");
2472 graph.nodes.push(make_node(
2473 "concept:high",
2474 "High",
2475 "smart home api",
2476 &[],
2477 &[],
2478 1.0,
2479 0.0,
2480 0,
2481 ));
2482 graph.nodes.push(make_node(
2483 "concept:low",
2484 "Low",
2485 "smart home api",
2486 &[],
2487 &[],
2488 0.0,
2489 0.0,
2490 0,
2491 ));
2492
2493 let results = find_all_matches_with_index(
2494 &graph,
2495 "smart home api",
2496 true,
2497 false,
2498 FindMode::Bm25,
2499 None,
2500 None,
2501 );
2502 let high_score = score_for(&results, "concept:high");
2503 let low_score = score_for(&results, "concept:low");
2504 assert!(high_score > low_score);
2505 }
2506
2507 #[test]
2508 fn bm25_prefers_self_match_over_neighbor_only_match() {
2509 let mut graph = GraphFile::new("test");
2510 graph.nodes.push(make_node(
2511 "concept:self_hit",
2512 "Batch plugin output directory",
2513 "",
2514 &["BatchPlugin OUTPUT_DIR rule in WebLogic path"],
2515 &[],
2516 0.5,
2517 0.0,
2518 0,
2519 ));
2520 graph.nodes.push(make_node(
2521 "concept:hub",
2522 "Integration Hub",
2523 "gateway for many systems",
2524 &[],
2525 &[],
2526 0.5,
2527 0.0,
2528 0,
2529 ));
2530 graph.nodes.push(make_node(
2531 "concept:neighbor_hit",
2532 "BatchPlugin OUTPUT_DIR in WebLogic",
2533 "",
2534 &[],
2535 &[],
2536 0.5,
2537 0.0,
2538 0,
2539 ));
2540 graph
2541 .edges
2542 .push(make_edge("concept:hub", "HAS", "concept:neighbor_hit"));
2543
2544 let results = find_all_matches_with_index(
2545 &graph,
2546 "BatchPlugin OUTPUT_DIR WebLogic",
2547 true,
2548 false,
2549 FindMode::Bm25,
2550 None,
2551 None,
2552 );
2553
2554 assert!(results.iter().any(|item| item.node.id == "concept:hub"));
2555 assert!(score_for(&results, "concept:self_hit") > score_for(&results, "concept:hub"));
2556 }
2557
2558 #[test]
2559 fn link_rendering_sorts_incident_edges_by_query_relevance() {
2560 let mut graph = GraphFile::new("test");
2561 graph.nodes.push(make_node(
2562 "concept:center",
2563 "Center",
2564 "",
2565 &[],
2566 &[],
2567 0.5,
2568 0.0,
2569 0,
2570 ));
2571 graph.nodes.push(make_node(
2572 "concept:relevant",
2573 "Push notification template",
2574 "",
2575 &[],
2576 &[],
2577 0.2,
2578 0.0,
2579 0,
2580 ));
2581 graph.nodes.push(make_node(
2582 "concept:irrelevant_a",
2583 "Billing ledger",
2584 "",
2585 &[],
2586 &[],
2587 0.9,
2588 0.0,
2589 0,
2590 ));
2591 graph.nodes.push(make_node(
2592 "concept:irrelevant_b",
2593 "Audit trail",
2594 "",
2595 &[],
2596 &[],
2597 0.8,
2598 0.0,
2599 0,
2600 ));
2601 graph
2602 .edges
2603 .push(make_edge("concept:center", "HAS", "concept:irrelevant_a"));
2604 graph
2605 .edges
2606 .push(make_edge("concept:center", "HAS", "concept:irrelevant_b"));
2607 graph
2608 .edges
2609 .push(make_edge("concept:center", "HAS", "concept:relevant"));
2610
2611 let center = graph.node_by_id("concept:center").expect("center node");
2612 let lines = render_node_link_lines(&graph, center, 2, Some("push notification template"));
2613
2614 let first_edge = lines
2615 .iter()
2616 .find(|line| line.starts_with("-> "))
2617 .expect("first edge line");
2618 assert!(first_edge.contains("concept:relevant"));
2619 }
2620
2621 #[test]
2622 fn final_score_caps_authority_boost_for_weak_relevance() {
2623 let weak = make_node(
2624 "concept:weak",
2625 "Weak",
2626 "smart home api",
2627 &[],
2628 &[],
2629 1.0,
2630 300.0,
2631 1,
2632 );
2633 let strong = make_node(
2634 "concept:strong",
2635 "Strong",
2636 "smart home api smart home api smart home api smart home api",
2637 &[],
2638 &[],
2639 0.5,
2640 0.0,
2641 0,
2642 );
2643 let candidates = vec![
2644 RawCandidate {
2645 node: &weak,
2646 raw_relevance: 12.0,
2647 lexical_boost: 0,
2648 },
2649 RawCandidate {
2650 node: &strong,
2651 raw_relevance: 100.0,
2652 lexical_boost: 0,
2653 },
2654 ];
2655 let scored = compose_scores(candidates);
2656 let weak_scored = scored
2657 .iter()
2658 .find(|item| item.node.id == "concept:weak")
2659 .expect("weak node");
2660 assert_eq!(
2661 weak_scored.breakdown.authority_applied,
2662 weak_scored.breakdown.authority_cap
2663 );
2664 assert!(weak_scored.breakdown.authority_raw > weak_scored.breakdown.authority_cap);
2665 }
2666
2667 #[test]
2668 fn importance_and_feedback_boost_have_expected_ranges() {
2669 let high_importance = make_node("concept:high", "High", "", &[], &[], 1.0, 0.0, 0);
2670 let low_importance = make_node("concept:low", "Low", "", &[], &[], 0.0, 0.0, 0);
2671 assert_eq!(importance_boost(&high_importance), 66);
2672 assert_eq!(importance_boost(&low_importance), -66);
2673
2674 let positive = make_node("concept:pos", "Pos", "", &[], &[], 0.5, 1.0, 1);
2675 let negative = make_node("concept:neg", "Neg", "", &[], &[], 0.5, -2.0, 1);
2676 let saturated = make_node("concept:sat", "Sat", "", &[], &[], 0.5, 300.0, 1);
2677 assert_eq!(feedback_boost(&positive), 46);
2678 assert_eq!(feedback_boost(&negative), -92);
2679 assert_eq!(feedback_boost(&saturated), 300);
2680 }
2681
2682 #[test]
2683 fn find_deduplicates_results_by_node_id_for_single_query() {
2684 let mut graph = GraphFile::new("test");
2685 graph.nodes.push(make_node(
2686 "concept:rule",
2687 "Business Rule",
2688 "Rule for billing decisions",
2689 &["Business rule validation"],
2690 &["billing rule"],
2691 0.5,
2692 0.0,
2693 0,
2694 ));
2695 graph.nodes.push(make_node(
2696 "concept:rule",
2697 "Business Rule Duplicate",
2698 "Duplicate record with same id",
2699 &["Business rule duplicate"],
2700 &[],
2701 0.5,
2702 0.0,
2703 0,
2704 ));
2705
2706 let results = find_all_matches_with_index(
2707 &graph,
2708 "business rule",
2709 true,
2710 false,
2711 FindMode::Hybrid,
2712 None,
2713 None,
2714 );
2715 let rule_hits = results
2716 .iter()
2717 .filter(|item| item.node.id == "concept:rule")
2718 .count();
2719 assert_eq!(rule_hits, 1);
2720 }
2721
2722 #[test]
2723 fn hybrid_score_does_not_change_when_only_vector_weight_changes() {
2724 let mut graph = GraphFile::new("test");
2725 graph.nodes.push(make_node(
2726 "concept:auth",
2727 "Authentication Rule",
2728 "Business rule for authentication",
2729 &["auth rule"],
2730 &["login policy"],
2731 0.5,
2732 0.0,
2733 0,
2734 ));
2735
2736 let with_vector = find_all_matches_with_index(
2737 &graph,
2738 "authentication rule",
2739 true,
2740 false,
2741 FindMode::Hybrid,
2742 None,
2743 Some(&FindTune {
2744 bm25: 0.55,
2745 fuzzy: 0.35,
2746 vector: 1.0,
2747 }),
2748 );
2749 let no_vector = find_all_matches_with_index(
2750 &graph,
2751 "authentication rule",
2752 true,
2753 false,
2754 FindMode::Hybrid,
2755 None,
2756 Some(&FindTune {
2757 bm25: 0.55,
2758 fuzzy: 0.35,
2759 vector: 0.0,
2760 }),
2761 );
2762
2763 assert_eq!(with_vector.len(), 1);
2764 assert_eq!(no_vector.len(), 1);
2765 assert_eq!(with_vector[0].score, no_vector[0].score);
2766 }
2767
2768 #[test]
2769 fn find_hides_metadata_nodes_unless_enabled() {
2770 let mut graph = GraphFile::new("test");
2771 graph.nodes.push(make_node(
2772 "^:graph_info",
2773 "Graph Metadata",
2774 "Internal metadata",
2775 &["graph_uuid=abc123"],
2776 &[],
2777 0.5,
2778 0.0,
2779 0,
2780 ));
2781 if let Some(meta) = graph
2782 .nodes
2783 .iter_mut()
2784 .find(|node| node.id == "^:graph_info")
2785 {
2786 meta.r#type = "^".to_owned();
2787 }
2788
2789 let hidden = find_all_matches_with_index(
2790 &graph,
2791 "graph uuid",
2792 true,
2793 false,
2794 FindMode::Hybrid,
2795 None,
2796 None,
2797 );
2798 assert!(hidden.is_empty());
2799
2800 let shown = find_all_matches_with_index(
2801 &graph,
2802 "graph uuid",
2803 true,
2804 true,
2805 FindMode::Hybrid,
2806 None,
2807 None,
2808 );
2809 assert_eq!(shown.len(), 1);
2810 assert_eq!(shown[0].node.id, "^:graph_info");
2811 }
2812}