1use std::collections::{HashMap, HashSet, VecDeque};
2
3use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
4use nucleo_matcher::{Config, Matcher, Utf32Str};
5
6use crate::graph::{Edge, GraphFile, Node, Note};
7use crate::index::Bm25Index;
8use crate::text_norm;
9
10const BM25_K1: f64 = 1.5;
11const BM25_B: f64 = 0.75;
12const DEFAULT_TARGET_CHARS: usize = 4200;
13const MIN_TARGET_CHARS: usize = 300;
14const MAX_TARGET_CHARS: usize = 12_000;
15const FUZZY_NEIGHBOR_CONTEXT_CAP: u32 = 220;
16const FUZZY_NO_PRIMARY_CONTEXT_DIVISOR: u32 = 3;
17const FUZZY_NEIGHBOR_CONTEXT_DIVISOR: u32 = 3;
18const FUZZY_DESCRIPTION_WEIGHT: u32 = 2;
19const FUZZY_FACT_WEIGHT: u32 = 2;
20const FUZZY_NOTE_BODY_WEIGHT: u32 = 1;
21const FUZZY_NOTE_TAG_WEIGHT: u32 = 2;
22const BM25_PHRASE_MATCH_BOOST: i64 = 120;
23const BM25_PROXIMITY_MATCH_BOOST: i64 = 80;
24const BM25_TOKEN_MATCH_BOOST: i64 = 45;
25const BM25_ID_WEIGHT: usize = 5;
26const BM25_NAME_WEIGHT: usize = 4;
27const BM25_ALIAS_WEIGHT: usize = 4;
28const BM25_DESCRIPTION_WEIGHT: usize = 2;
29const BM25_FACT_WEIGHT: usize = 2;
30const BM25_NOTE_BODY_WEIGHT: usize = 1;
31const BM25_NOTE_TAG_WEIGHT: usize = 1;
32const BM25_NEIGHBOR_WEIGHT: usize = 1;
33const BM25_SELF_CONTEXT_WEIGHT: f64 = 3.0;
34const BM25_NEIGHBOR_CONTEXT_WEIGHT: f64 = 1.0;
35const BM25_PROXIMITY_WINDOW_TOKENS: usize = 6;
36const FACT_VOLUME_BASE_CHARS: f64 = 500.0;
37const FACT_VOLUME_MIN_FACTOR: f64 = 0.35;
38const IMPORTANCE_NEUTRAL: f64 = 0.5;
39const IMPORTANCE_MAX_ABS_BOOST: f64 = 66.0;
40const SCORE_META_MAX_RATIO: f64 = 0.35;
41const SCORE_META_MIN_CAP: i64 = 30;
42const SCORE_META_MAX_CAP: i64 = 240;
43
44#[derive(Debug, Clone, Copy)]
45pub enum FindMode {
46 Fuzzy,
47 Bm25,
48 Hybrid,
49}
50
51#[derive(Debug, Clone, Copy)]
52pub struct FindTune {
53 pub bm25: f64,
54 pub fuzzy: f64,
55 pub vector: f64,
56}
57
58impl FindTune {
59 pub fn parse(raw: &str) -> Option<Self> {
60 let mut tune = Self::default();
61 for part in raw.split(',') {
62 let (key, value) = part.split_once('=')?;
63 let value = value.trim().parse::<f64>().ok()?;
64 match key.trim() {
65 "bm25" => tune.bm25 = value,
66 "fuzzy" => tune.fuzzy = value,
67 "vector" => tune.vector = value,
68 _ => return None,
69 }
70 }
71 Some(tune.clamped())
72 }
73
74 fn clamped(self) -> Self {
75 Self {
76 bm25: self.bm25.clamp(0.0, 1.0),
77 fuzzy: self.fuzzy.clamp(0.0, 1.0),
78 vector: self.vector.clamp(0.0, 1.0),
79 }
80 }
81}
82
83impl Default for FindTune {
84 fn default() -> Self {
85 Self {
86 bm25: 0.55,
87 fuzzy: 0.35,
88 vector: 0.10,
89 }
90 }
91}
92
93#[derive(Clone, Copy)]
94struct ScoredNode<'a> {
95 score: i64,
96 node: &'a Node,
97 breakdown: ScoreBreakdown,
98}
99
100#[derive(Debug, Clone, Copy)]
101struct ScoreBreakdown {
102 raw_relevance: f64,
103 normalized_relevance: i64,
104 lexical_boost: i64,
105 feedback_boost: i64,
106 importance_boost: i64,
107 authority_raw: i64,
108 authority_applied: i64,
109 authority_cap: i64,
110}
111
112struct RawCandidate<'a> {
113 node: &'a Node,
114 raw_relevance: f64,
115 lexical_boost: i64,
116}
117
118struct FindQueryContext<'a> {
119 notes_by_node: HashMap<&'a str, Vec<&'a Note>>,
120 neighbors_by_node: HashMap<&'a str, Vec<&'a Node>>,
121}
122
123impl<'a> FindQueryContext<'a> {
124 fn build(graph: &'a GraphFile) -> Self {
125 let node_by_id: HashMap<&'a str, &'a Node> = graph
126 .nodes
127 .iter()
128 .map(|node| (node.id.as_str(), node))
129 .collect();
130
131 let mut notes_by_node: HashMap<&'a str, Vec<&'a Note>> = HashMap::new();
132 for note in &graph.notes {
133 notes_by_node
134 .entry(note.node_id.as_str())
135 .or_default()
136 .push(note);
137 }
138
139 let mut neighbors_by_node: HashMap<&'a str, Vec<&'a Node>> = HashMap::new();
140 for edge in &graph.edges {
141 if let (Some(source), Some(target)) = (
142 node_by_id.get(edge.source_id.as_str()),
143 node_by_id.get(edge.target_id.as_str()),
144 ) {
145 neighbors_by_node
146 .entry(source.id.as_str())
147 .or_default()
148 .push(*target);
149 neighbors_by_node
150 .entry(target.id.as_str())
151 .or_default()
152 .push(*source);
153 }
154 }
155
156 for neighbors in neighbors_by_node.values_mut() {
157 neighbors.sort_by(|left, right| left.id.cmp(&right.id));
158 neighbors.dedup_by(|left, right| left.id == right.id);
159 }
160
161 Self {
162 notes_by_node,
163 neighbors_by_node,
164 }
165 }
166
167 fn notes_for(&self, node_id: &str) -> &[&'a Note] {
168 self.notes_by_node
169 .get(node_id)
170 .map(Vec::as_slice)
171 .unwrap_or(&[])
172 }
173
174 fn neighbors_for(&self, node_id: &str) -> &[&'a Node] {
175 self.neighbors_by_node
176 .get(node_id)
177 .map(Vec::as_slice)
178 .unwrap_or(&[])
179 }
180}
181
182#[derive(Debug, Clone)]
183pub struct ScoreBreakdownResult {
184 pub raw_relevance: f64,
185 pub normalized_relevance: i64,
186 pub lexical_boost: i64,
187 pub feedback_boost: i64,
188 pub importance_boost: i64,
189 pub authority_raw: i64,
190 pub authority_applied: i64,
191 pub authority_cap: i64,
192}
193
194#[derive(Debug, Clone)]
195pub struct ScoredNodeResult {
196 pub score: i64,
197 pub node: Node,
198 pub breakdown: ScoreBreakdownResult,
199}
200
201pub fn render_find(
202 graph: &GraphFile,
203 queries: &[String],
204 limit: usize,
205 include_features: bool,
206 include_metadata: bool,
207 mode: FindMode,
208 full: bool,
209) -> String {
210 render_find_with_index(
211 graph,
212 queries,
213 limit,
214 include_features,
215 include_metadata,
216 mode,
217 full,
218 false,
219 None,
220 )
221}
222
223pub fn render_find_with_index(
224 graph: &GraphFile,
225 queries: &[String],
226 limit: usize,
227 include_features: bool,
228 include_metadata: bool,
229 mode: FindMode,
230 full: bool,
231 debug_score: bool,
232 index: Option<&Bm25Index>,
233) -> String {
234 render_find_with_index_tuned(
235 graph,
236 queries,
237 limit,
238 include_features,
239 include_metadata,
240 mode,
241 full,
242 debug_score,
243 index,
244 None,
245 )
246}
247
248pub fn render_find_with_index_tuned(
249 graph: &GraphFile,
250 queries: &[String],
251 limit: usize,
252 include_features: bool,
253 include_metadata: bool,
254 mode: FindMode,
255 full: bool,
256 debug_score: bool,
257 index: Option<&Bm25Index>,
258 tune: Option<&FindTune>,
259) -> String {
260 let mut sections = Vec::new();
261 for query in queries {
262 let matches = find_all_matches_with_index(
263 graph,
264 query,
265 include_features,
266 include_metadata,
267 mode,
268 index,
269 tune,
270 );
271 let total = matches.len();
272 let visible: Vec<_> = matches.into_iter().take(limit).collect();
273 let shown = visible.len();
274 let mut lines = vec![render_result_header(query, shown, total)];
275 for scored in visible {
276 lines.push(render_scored_node_block(
277 graph,
278 &scored,
279 full,
280 debug_score,
281 Some(query.as_str()),
282 ));
283 }
284 push_limit_omission_line(&mut lines, shown, total);
285 sections.push(lines.join("\n"));
286 }
287 format!("{}\n", sections.join("\n\n"))
288}
289
290pub fn find_nodes(
291 graph: &GraphFile,
292 query: &str,
293 limit: usize,
294 include_features: bool,
295 include_metadata: bool,
296 mode: FindMode,
297) -> Vec<Node> {
298 find_matches_with_index(
299 graph,
300 query,
301 limit,
302 include_features,
303 include_metadata,
304 mode,
305 None,
306 None,
307 )
308 .into_iter()
309 .map(|item| item.node.clone())
310 .collect()
311}
312
313pub fn find_nodes_with_index(
314 graph: &GraphFile,
315 query: &str,
316 limit: usize,
317 include_features: bool,
318 include_metadata: bool,
319 mode: FindMode,
320 index: Option<&Bm25Index>,
321) -> Vec<Node> {
322 find_matches_with_index(
323 graph,
324 query,
325 limit,
326 include_features,
327 include_metadata,
328 mode,
329 index,
330 None,
331 )
332 .into_iter()
333 .map(|item| item.node.clone())
334 .collect()
335}
336
337pub fn find_nodes_with_index_tuned(
338 graph: &GraphFile,
339 query: &str,
340 limit: usize,
341 include_features: bool,
342 include_metadata: bool,
343 mode: FindMode,
344 index: Option<&Bm25Index>,
345 tune: Option<&FindTune>,
346) -> Vec<Node> {
347 find_matches_with_index(
348 graph,
349 query,
350 limit,
351 include_features,
352 include_metadata,
353 mode,
354 index,
355 tune,
356 )
357 .into_iter()
358 .map(|item| item.node.clone())
359 .collect()
360}
361
362pub fn find_nodes_and_total_with_index(
363 graph: &GraphFile,
364 query: &str,
365 limit: usize,
366 include_features: bool,
367 include_metadata: bool,
368 mode: FindMode,
369 index: Option<&Bm25Index>,
370) -> (usize, Vec<Node>) {
371 let matches = find_all_matches_with_index(
372 graph,
373 query,
374 include_features,
375 include_metadata,
376 mode,
377 index,
378 None,
379 );
380 let total = matches.len();
381 let nodes = matches
382 .into_iter()
383 .take(limit)
384 .map(|item| item.node.clone())
385 .collect();
386 (total, nodes)
387}
388
389pub fn find_scored_nodes_and_total_with_index(
390 graph: &GraphFile,
391 query: &str,
392 limit: usize,
393 include_features: bool,
394 include_metadata: bool,
395 mode: FindMode,
396 index: Option<&Bm25Index>,
397) -> (usize, Vec<ScoredNodeResult>) {
398 find_scored_nodes_and_total_with_index_tuned(
399 graph,
400 query,
401 limit,
402 include_features,
403 include_metadata,
404 mode,
405 index,
406 None,
407 )
408}
409
410pub fn find_scored_nodes_and_total_with_index_tuned(
411 graph: &GraphFile,
412 query: &str,
413 limit: usize,
414 include_features: bool,
415 include_metadata: bool,
416 mode: FindMode,
417 index: Option<&Bm25Index>,
418 tune: Option<&FindTune>,
419) -> (usize, Vec<ScoredNodeResult>) {
420 let matches = find_all_matches_with_index(
421 graph,
422 query,
423 include_features,
424 include_metadata,
425 mode,
426 index,
427 tune,
428 );
429 let total = matches.len();
430 let nodes = matches
431 .into_iter()
432 .take(limit)
433 .map(|item| ScoredNodeResult {
434 score: item.score,
435 node: item.node.clone(),
436 breakdown: ScoreBreakdownResult {
437 raw_relevance: item.breakdown.raw_relevance,
438 normalized_relevance: item.breakdown.normalized_relevance,
439 lexical_boost: item.breakdown.lexical_boost,
440 feedback_boost: item.breakdown.feedback_boost,
441 importance_boost: item.breakdown.importance_boost,
442 authority_raw: item.breakdown.authority_raw,
443 authority_applied: item.breakdown.authority_applied,
444 authority_cap: item.breakdown.authority_cap,
445 },
446 })
447 .collect();
448 (total, nodes)
449}
450
451pub fn count_find_results(
452 graph: &GraphFile,
453 queries: &[String],
454 limit: usize,
455 include_features: bool,
456 include_metadata: bool,
457 mode: FindMode,
458) -> usize {
459 count_find_results_with_index(
460 graph,
461 queries,
462 limit,
463 include_features,
464 include_metadata,
465 mode,
466 None,
467 )
468}
469
470pub fn count_find_results_with_index(
471 graph: &GraphFile,
472 queries: &[String],
473 _limit: usize,
474 include_features: bool,
475 include_metadata: bool,
476 mode: FindMode,
477 index: Option<&Bm25Index>,
478) -> usize {
479 let mut total = 0;
480 for query in queries {
481 total += find_all_matches_with_index(
482 graph,
483 query,
484 include_features,
485 include_metadata,
486 mode,
487 index,
488 None,
489 )
490 .len();
491 }
492 total
493}
494
495pub fn render_node(graph: &GraphFile, node: &Node, full: bool) -> String {
496 format!("{}\n", render_node_block(graph, node, full))
497}
498
499pub fn render_node_adaptive(graph: &GraphFile, node: &Node, target_chars: Option<usize>) -> String {
500 let target = clamp_target_chars(target_chars);
501 let full = format!("{}\n", render_node_block(graph, node, true));
502 if fits_target_chars(&full, target) {
503 return full;
504 }
505 let mut candidates = Vec::new();
506 for (depth, detail, edge_cap) in [
507 (0usize, DetailLevel::Rich, 8usize),
508 (1usize, DetailLevel::Rich, 8usize),
509 (2usize, DetailLevel::Rich, 6usize),
510 (2usize, DetailLevel::Compact, 6usize),
511 (2usize, DetailLevel::Minimal, 2usize),
512 ] {
513 let rendered = render_single_node_candidate(graph, node, depth, detail, edge_cap);
514 candidates.push(Candidate {
515 rendered,
516 depth,
517 detail,
518 shown_nodes: 1 + depth,
519 });
520 }
521 pick_best_candidate(candidates, target)
522}
523
524pub fn render_find_adaptive_with_index(
525 graph: &GraphFile,
526 queries: &[String],
527 limit: usize,
528 include_features: bool,
529 include_metadata: bool,
530 mode: FindMode,
531 target_chars: Option<usize>,
532 debug_score: bool,
533 index: Option<&Bm25Index>,
534) -> String {
535 render_find_adaptive_with_index_tuned(
536 graph,
537 queries,
538 limit,
539 include_features,
540 include_metadata,
541 mode,
542 target_chars,
543 debug_score,
544 index,
545 None,
546 )
547}
548
549pub fn render_find_adaptive_with_index_tuned(
550 graph: &GraphFile,
551 queries: &[String],
552 limit: usize,
553 include_features: bool,
554 include_metadata: bool,
555 mode: FindMode,
556 target_chars: Option<usize>,
557 debug_score: bool,
558 index: Option<&Bm25Index>,
559 tune: Option<&FindTune>,
560) -> String {
561 let target = clamp_target_chars(target_chars);
562 let mut sections = Vec::new();
563 for query in queries {
564 let matches = find_all_matches_with_index(
565 graph,
566 query,
567 include_features,
568 include_metadata,
569 mode,
570 index,
571 tune,
572 );
573 let total = matches.len();
574 let visible: Vec<_> = matches.into_iter().take(limit).collect();
575 let section = if visible.len() == 1 {
576 render_single_result_section(graph, query, &visible[0], total, target, debug_score)
577 } else {
578 render_multi_result_section(graph, query, &visible, total, target, debug_score)
579 };
580 sections.push(section);
581 }
582 format!("{}\n", sections.join("\n\n"))
583}
584
585#[derive(Clone, Copy)]
586enum DetailLevel {
587 Rich,
588 Compact,
589 Minimal,
590}
591
592struct Candidate {
593 rendered: String,
594 depth: usize,
595 detail: DetailLevel,
596 shown_nodes: usize,
597}
598
599impl DetailLevel {
600 fn utility_bonus(self) -> usize {
601 match self {
602 DetailLevel::Rich => 20,
603 DetailLevel::Compact => 10,
604 DetailLevel::Minimal => 0,
605 }
606 }
607}
608
609fn clamp_target_chars(target_chars: Option<usize>) -> usize {
610 target_chars
611 .unwrap_or(DEFAULT_TARGET_CHARS)
612 .clamp(MIN_TARGET_CHARS, MAX_TARGET_CHARS)
613}
614
615fn render_single_result_section(
616 graph: &GraphFile,
617 query: &str,
618 node: &ScoredNode<'_>,
619 total_available: usize,
620 target: usize,
621 debug_score: bool,
622) -> String {
623 let header = render_result_header(query, 1, total_available);
624 let full = render_single_result_candidate(
625 graph,
626 query,
627 &header,
628 node,
629 total_available,
630 0,
631 DetailLevel::Rich,
632 8,
633 true,
634 debug_score,
635 );
636 if fits_target_chars(&full, target) {
637 return full.trim_end().to_owned();
638 }
639 let mut candidates = Vec::new();
640 for (depth, detail, edge_cap) in [
641 (0usize, DetailLevel::Rich, 8usize),
642 (1usize, DetailLevel::Rich, 8usize),
643 (2usize, DetailLevel::Rich, 6usize),
644 (2usize, DetailLevel::Compact, 6usize),
645 (2usize, DetailLevel::Minimal, 2usize),
646 ] {
647 candidates.push(Candidate {
648 rendered: render_single_result_candidate(
649 graph,
650 query,
651 &header,
652 node,
653 total_available,
654 depth,
655 detail,
656 edge_cap,
657 false,
658 debug_score,
659 ),
660 depth,
661 detail,
662 shown_nodes: 1 + depth,
663 });
664 }
665 pick_best_candidate(candidates, target)
666 .trim_end()
667 .to_owned()
668}
669
670fn render_multi_result_section(
671 graph: &GraphFile,
672 query: &str,
673 nodes: &[ScoredNode<'_>],
674 total_available: usize,
675 target: usize,
676 debug_score: bool,
677) -> String {
678 let visible_total = nodes.len();
679 let full = render_full_result_section(graph, query, nodes, total_available, debug_score);
680 if fits_target_chars(&full, target) {
681 return full;
682 }
683 let mut candidates = Vec::new();
684 let full_cap = visible_total;
685 let mid_cap = full_cap.min(5);
686 let low_cap = full_cap.min(3);
687
688 for (detail, edge_cap, result_cap, depth) in [
689 (DetailLevel::Rich, 4usize, full_cap.min(4), 0usize),
690 (DetailLevel::Compact, 3usize, full_cap, 0usize),
691 (DetailLevel::Rich, 2usize, mid_cap, 1usize),
692 (DetailLevel::Compact, 1usize, full_cap, 0usize),
693 (DetailLevel::Minimal, 1usize, mid_cap, 0usize),
694 (DetailLevel::Minimal, 0usize, low_cap, 0usize),
695 (DetailLevel::Minimal, 0usize, low_cap.min(2), 1usize),
696 ] {
697 let shown = result_cap.min(nodes.len());
698 let mut lines = vec![render_result_header(query, shown, total_available)];
699 for node in nodes.iter().take(shown) {
700 lines.extend(render_scored_node_candidate_lines(
701 graph,
702 query,
703 node,
704 0,
705 detail,
706 edge_cap,
707 debug_score,
708 ));
709 if depth > 0 {
710 lines.extend(render_neighbor_layers(graph, node.node, depth, detail));
711 }
712 }
713 if visible_total > shown {
714 lines.push(format!("... +{} more nodes omitted", visible_total - shown));
715 }
716 push_limit_omission_line(&mut lines, visible_total, total_available);
717 candidates.push(Candidate {
718 rendered: format!("{}\n", lines.join("\n")),
719 depth,
720 detail,
721 shown_nodes: shown,
722 });
723 }
724
725 pick_best_candidate(candidates, target)
726 .trim_end()
727 .to_owned()
728}
729
730fn pick_best_candidate(candidates: Vec<Candidate>, target: usize) -> String {
731 let lower = (target as f64 * 0.7) as usize;
732 let mut best: Option<(usize, usize, usize, usize, String)> = None;
733
734 for candidate in candidates {
735 let chars = candidate.rendered.chars().count();
736 let overshoot = chars.saturating_sub(target);
737 let undershoot = lower.saturating_sub(chars);
738 let penalty = overshoot.saturating_mul(10).saturating_add(undershoot);
739 let utility = candidate
740 .depth
741 .saturating_mul(100)
742 .saturating_add(candidate.shown_nodes.saturating_mul(5))
743 .saturating_add(candidate.detail.utility_bonus());
744
745 let entry = (
746 penalty,
747 overshoot,
748 usize::MAX - utility,
749 usize::MAX - chars,
750 candidate.rendered,
751 );
752 if best.as_ref().is_none_or(|current| {
753 entry.0 < current.0
754 || (entry.0 == current.0 && entry.1 < current.1)
755 || (entry.0 == current.0 && entry.1 == current.1 && entry.2 < current.2)
756 || (entry.0 == current.0
757 && entry.1 == current.1
758 && entry.2 == current.2
759 && entry.3 < current.3)
760 }) {
761 best = Some(entry);
762 }
763 }
764
765 best.map(|item| item.4).unwrap_or_else(|| "\n".to_owned())
766}
767
768fn render_full_result_section(
769 graph: &GraphFile,
770 query: &str,
771 nodes: &[ScoredNode<'_>],
772 total_available: usize,
773 debug_score: bool,
774) -> String {
775 let mut lines = vec![render_result_header(query, nodes.len(), total_available)];
776 for node in nodes {
777 lines.push(render_scored_node_block(
778 graph,
779 node,
780 true,
781 debug_score,
782 Some(query),
783 ));
784 }
785 push_limit_omission_line(&mut lines, nodes.len(), total_available);
786 lines.join("\n")
787}
788
789fn render_result_header(query: &str, shown: usize, total: usize) -> String {
790 let query = escape_cli_text(query);
791 if shown < total {
792 format!("? {query} ({shown}/{total})")
793 } else {
794 format!("? {query} ({total})")
795 }
796}
797
798fn push_limit_omission_line(lines: &mut Vec<String>, shown: usize, total: usize) {
799 let omitted = total.saturating_sub(shown);
800 if omitted > 0 {
801 lines.push(format!("... {omitted} more nodes omitted by limit"));
802 }
803}
804
805fn fits_target_chars(rendered: &str, target: usize) -> bool {
806 rendered.chars().count() <= target
807}
808
809fn render_single_node_candidate(
810 graph: &GraphFile,
811 node: &Node,
812 depth: usize,
813 detail: DetailLevel,
814 edge_cap: usize,
815) -> String {
816 let lines = render_single_node_candidate_lines(graph, node, depth, detail, edge_cap, None);
817 format!("{}\n", lines.join("\n"))
818}
819
820fn render_single_result_candidate(
821 graph: &GraphFile,
822 query: &str,
823 header: &str,
824 node: &ScoredNode<'_>,
825 total_available: usize,
826 depth: usize,
827 detail: DetailLevel,
828 edge_cap: usize,
829 full: bool,
830 debug_score: bool,
831) -> String {
832 let mut lines = vec![header.to_owned()];
833 if full {
834 lines.push(render_scored_node_block(
835 graph,
836 node,
837 true,
838 debug_score,
839 Some(query),
840 ));
841 } else {
842 lines.extend(render_scored_node_candidate_lines(
843 graph,
844 query,
845 node,
846 depth,
847 detail,
848 edge_cap,
849 debug_score,
850 ));
851 }
852 push_limit_omission_line(&mut lines, 1, total_available);
853 format!("{}\n", lines.join("\n"))
854}
855
856fn render_single_node_candidate_lines(
857 graph: &GraphFile,
858 node: &Node,
859 depth: usize,
860 detail: DetailLevel,
861 edge_cap: usize,
862 query: Option<&str>,
863) -> Vec<String> {
864 let mut lines = render_node_lines_with_edges(graph, node, detail, edge_cap, query);
865 if depth > 0 {
866 lines.extend(render_neighbor_layers(graph, node, depth, detail));
867 }
868 lines
869}
870
871fn render_scored_node_candidate_lines(
872 graph: &GraphFile,
873 query: &str,
874 node: &ScoredNode<'_>,
875 depth: usize,
876 detail: DetailLevel,
877 edge_cap: usize,
878 debug_score: bool,
879) -> Vec<String> {
880 let mut lines = vec![format!("score: {}", node.score)];
881 if debug_score {
882 lines.push(render_score_debug_line(node));
883 }
884 lines.extend(render_single_node_candidate_lines(
885 graph,
886 node.node,
887 depth,
888 detail,
889 edge_cap,
890 Some(query),
891 ));
892 lines
893}
894
895fn render_scored_node_block(
896 graph: &GraphFile,
897 node: &ScoredNode<'_>,
898 full: bool,
899 debug_score: bool,
900 query: Option<&str>,
901) -> String {
902 if debug_score {
903 format!(
904 "score: {}\n{}\n{}",
905 node.score,
906 render_score_debug_line(node),
907 render_node_block_with_query(graph, node.node, full, query)
908 )
909 } else {
910 format!(
911 "score: {}\n{}",
912 node.score,
913 render_node_block_with_query(graph, node.node, full, query)
914 )
915 }
916}
917
918fn render_score_debug_line(node: &ScoredNode<'_>) -> String {
919 format!(
920 "score_debug: raw_relevance={:.3} normalized_relevance={} lexical_boost={} feedback_boost={} importance_boost={} authority_raw={} authority_applied={} authority_cap={}",
921 node.breakdown.raw_relevance,
922 node.breakdown.normalized_relevance,
923 node.breakdown.lexical_boost,
924 node.breakdown.feedback_boost,
925 node.breakdown.importance_boost,
926 node.breakdown.authority_raw,
927 node.breakdown.authority_applied,
928 node.breakdown.authority_cap,
929 )
930}
931
932fn render_neighbor_layers(
933 graph: &GraphFile,
934 root: &Node,
935 max_depth: usize,
936 detail: DetailLevel,
937) -> Vec<String> {
938 let mut out = Vec::new();
939 let mut seen: HashSet<String> = HashSet::from([root.id.clone()]);
940 let mut queue: VecDeque<(String, usize)> = VecDeque::from([(root.id.clone(), 0usize)]);
941 let mut layers: Vec<Vec<&Node>> = vec![Vec::new(); max_depth + 1];
942
943 while let Some((node_id, depth)) = queue.pop_front() {
944 if depth >= max_depth {
945 continue;
946 }
947 for incident in incident_edges(graph, &node_id) {
948 if seen.insert(incident.related.id.clone()) {
949 let next_depth = depth + 1;
950 if next_depth <= max_depth {
951 layers[next_depth].push(incident.related);
952 queue.push_back((incident.related.id.clone(), next_depth));
953 }
954 }
955 }
956 }
957
958 for depth in 1..=max_depth {
959 if layers[depth].is_empty() {
960 continue;
961 }
962 let cap = match detail {
963 DetailLevel::Rich => 6,
964 DetailLevel::Compact => 4,
965 DetailLevel::Minimal => 3,
966 };
967 let shown = layers[depth].len().min(cap);
968 out.push(format!(
969 "depth {depth}: {shown}/{} neighbors",
970 layers[depth].len()
971 ));
972 for node in layers[depth].iter().take(shown) {
973 out.extend(render_node_identity_lines(node, detail));
974 }
975 if layers[depth].len() > shown {
976 out.push(format!(
977 "... +{} more neighbors omitted",
978 layers[depth].len() - shown
979 ));
980 }
981 }
982
983 out
984}
985
986fn render_node_lines_with_edges(
987 graph: &GraphFile,
988 node: &Node,
989 detail: DetailLevel,
990 edge_cap: usize,
991 query: Option<&str>,
992) -> Vec<String> {
993 let mut lines = render_node_identity_lines(node, detail);
994 lines.extend(render_node_link_lines(graph, node, edge_cap, query));
995 lines
996}
997
998fn render_node_identity_lines(node: &Node, detail: DetailLevel) -> Vec<String> {
999 let mut lines = Vec::new();
1000 let display_name = node_display_name(node);
1001 match detail {
1002 DetailLevel::Rich => {
1003 lines.push(format!(
1004 "# {} | {} [{}]",
1005 node.id,
1006 escape_cli_text(&display_name),
1007 node.r#type
1008 ));
1009 if !node.properties.alias.is_empty() {
1010 lines.push(format!(
1011 "aka: {}",
1012 node.properties
1013 .alias
1014 .iter()
1015 .map(|alias| escape_cli_text(alias))
1016 .collect::<Vec<_>>()
1017 .join(", ")
1018 ));
1019 }
1020 push_description_line(&mut lines, &node.properties.description, None);
1021 let shown_facts = node.properties.key_facts.len().min(3);
1022 for fact in node.properties.key_facts.iter().take(shown_facts) {
1023 lines.push(format!("- {}", escape_cli_text(fact)));
1024 }
1025 let omitted = node.properties.key_facts.len().saturating_sub(shown_facts);
1026 if omitted > 0 {
1027 lines.push(format!("... {omitted} more facts omitted"));
1028 }
1029 }
1030 DetailLevel::Compact => {
1031 lines.push(format!(
1032 "# {} | {} [{}]",
1033 node.id,
1034 escape_cli_text(&display_name),
1035 node.r#type
1036 ));
1037 push_description_line(&mut lines, &node.properties.description, Some(140));
1038 if let Some(fact) = node.properties.key_facts.first() {
1039 lines.push(format!("- {}", escape_cli_text(fact)));
1040 }
1041 }
1042 DetailLevel::Minimal => {
1043 lines.push(format!(
1044 "# {} | {} [{}]",
1045 node.id,
1046 escape_cli_text(&display_name),
1047 node.r#type
1048 ));
1049 }
1050 }
1051 lines
1052}
1053
1054fn node_display_name(node: &Node) -> String {
1055 if !node.name.trim().is_empty() {
1056 return node.name.clone();
1057 }
1058 let raw = node
1059 .id
1060 .split_once(':')
1061 .map(|(_, suffix)| {
1062 suffix
1063 .rsplit_once(':')
1064 .map(|(name, _)| name)
1065 .unwrap_or(suffix)
1066 })
1067 .unwrap_or(node.id.as_str())
1068 .to_owned();
1069 unescape_generated_name(&raw)
1070}
1071
1072fn unescape_generated_name(value: &str) -> String {
1073 let mut out = String::with_capacity(value.len());
1074 let mut chars = value.chars().peekable();
1075 while let Some(ch) = chars.next() {
1076 if ch != '~' {
1077 out.push(ch);
1078 continue;
1079 }
1080 match chars.next() {
1081 Some('~') => out.push('~'),
1082 Some('c') => out.push(':'),
1083 Some(other) => {
1084 out.push('~');
1085 out.push(other);
1086 }
1087 None => out.push('~'),
1088 }
1089 }
1090 out
1091}
1092
1093fn render_node_link_lines(
1094 graph: &GraphFile,
1095 node: &Node,
1096 edge_cap: usize,
1097 query: Option<&str>,
1098) -> Vec<String> {
1099 let mut incident = incident_edges(graph, &node.id);
1100 if let Some(query) = query {
1101 let query_terms = text_norm::expand_query_terms(query);
1102 if !query_terms.is_empty() {
1103 incident.sort_by(|left, right| {
1104 let right_relevance = incident_edge_query_relevance(right, &query_terms);
1105 let left_relevance = incident_edge_query_relevance(left, &query_terms);
1106 right_relevance
1107 .cmp(&left_relevance)
1108 .then_with(|| incident_edge_default_cmp(left, right))
1109 });
1110 }
1111 }
1112 if incident.is_empty() {
1113 return Vec::new();
1114 }
1115
1116 let mut lines = Vec::new();
1117 if incident.len() > 12 {
1118 lines.push(format!("links: {} total", incident.len()));
1119 let (out_summary, in_summary) = summarize_relations(&incident);
1120 if !out_summary.is_empty() {
1121 lines.push(format!("out: {out_summary}"));
1122 }
1123 if !in_summary.is_empty() {
1124 lines.push(format!("in: {in_summary}"));
1125 }
1126 }
1127
1128 let shown = incident.len().min(edge_cap);
1129 for edge in incident.into_iter().take(shown) {
1130 let prefix = if edge.incoming { "<-" } else { "->" };
1131 lines.extend(render_edge_lines(prefix, edge.edge, edge.related, false));
1132 }
1133 if edge_cap > 0 && incident_count(graph, &node.id) > shown {
1134 lines.push(format!(
1135 "... {} more links omitted",
1136 incident_count(graph, &node.id) - shown
1137 ));
1138 }
1139 lines
1140}
1141
1142fn incident_count(graph: &GraphFile, node_id: &str) -> usize {
1143 graph
1144 .edges
1145 .iter()
1146 .filter(|edge| edge.source_id == node_id || edge.target_id == node_id)
1147 .count()
1148}
1149
1150struct IncidentEdge<'a> {
1151 edge: &'a Edge,
1152 related: &'a Node,
1153 incoming: bool,
1154}
1155
1156fn incident_edges<'a>(graph: &'a GraphFile, node_id: &str) -> Vec<IncidentEdge<'a>> {
1157 let mut edges = Vec::new();
1158 for edge in &graph.edges {
1159 if edge.source_id == node_id {
1160 if let Some(related) = graph.node_by_id(&edge.target_id) {
1161 edges.push(IncidentEdge {
1162 edge,
1163 related,
1164 incoming: false,
1165 });
1166 }
1167 } else if edge.target_id == node_id {
1168 if let Some(related) = graph.node_by_id(&edge.source_id) {
1169 edges.push(IncidentEdge {
1170 edge,
1171 related,
1172 incoming: true,
1173 });
1174 }
1175 }
1176 }
1177 edges.sort_by(incident_edge_default_cmp);
1178 edges
1179}
1180
1181fn incident_edge_default_cmp(
1182 left: &IncidentEdge<'_>,
1183 right: &IncidentEdge<'_>,
1184) -> std::cmp::Ordering {
1185 right
1186 .related
1187 .properties
1188 .importance
1189 .partial_cmp(&left.related.properties.importance)
1190 .unwrap_or(std::cmp::Ordering::Equal)
1191 .then_with(|| left.edge.relation.cmp(&right.edge.relation))
1192 .then_with(|| left.related.id.cmp(&right.related.id))
1193}
1194
1195fn incident_edge_query_relevance(edge: &IncidentEdge<'_>, query_terms: &[String]) -> i64 {
1196 if query_terms.is_empty() {
1197 return 0;
1198 }
1199 let related = edge.related;
1200 let mut score = 0;
1201 score += query_overlap_score(&related.id, query_terms, 6);
1202 score += query_overlap_score(&related.name, query_terms, 5);
1203 score += query_overlap_score(&related.properties.description, query_terms, 2);
1204 score += query_overlap_score(&edge.edge.relation, query_terms, 2);
1205 score += query_overlap_score(&edge.edge.properties.detail, query_terms, 2);
1206 for alias in &related.properties.alias {
1207 score += query_overlap_score(alias, query_terms, 4);
1208 }
1209 score
1210}
1211
1212fn query_overlap_score(value: &str, query_terms: &[String], weight: i64) -> i64 {
1213 if value.is_empty() || query_terms.is_empty() {
1214 return 0;
1215 }
1216 let value_terms: HashSet<String> = tokenize(value).into_iter().collect();
1217 if value_terms.is_empty() {
1218 return 0;
1219 }
1220 let matches = query_terms
1221 .iter()
1222 .filter(|term| value_terms.contains(term.as_str()))
1223 .count() as i64;
1224 matches * weight
1225}
1226
1227fn summarize_relations(edges: &[IncidentEdge<'_>]) -> (String, String) {
1228 let mut out: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1229 let mut incoming: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1230
1231 for edge in edges {
1232 let bucket = if edge.incoming {
1233 &mut incoming
1234 } else {
1235 &mut out
1236 };
1237 *bucket.entry(edge.edge.relation.clone()).or_insert(0) += 1;
1238 }
1239
1240 (join_relation_counts(&out), join_relation_counts(&incoming))
1241}
1242
1243fn join_relation_counts(counts: &std::collections::BTreeMap<String, usize>) -> String {
1244 counts
1245 .iter()
1246 .take(3)
1247 .map(|(relation, count)| format!("{relation} x{count}"))
1248 .collect::<Vec<_>>()
1249 .join(", ")
1250}
1251
1252fn render_node_block(graph: &GraphFile, node: &Node, full: bool) -> String {
1253 render_node_block_with_query(graph, node, full, None)
1254}
1255
1256fn render_node_block_with_query(
1257 graph: &GraphFile,
1258 node: &Node,
1259 full: bool,
1260 query: Option<&str>,
1261) -> String {
1262 let mut lines = Vec::new();
1263 let display_name = node_display_name(node);
1264 let generated = crate::validate::is_generated_node_type(&node.r#type);
1265 lines.push(format!(
1266 "# {} | {} [{}]",
1267 node.id,
1268 escape_cli_text(&display_name),
1269 node.r#type
1270 ));
1271
1272 if !node.properties.alias.is_empty() {
1273 lines.push(format!(
1274 "aka: {}",
1275 node.properties
1276 .alias
1277 .iter()
1278 .map(|alias| escape_cli_text(alias))
1279 .collect::<Vec<_>>()
1280 .join(", ")
1281 ));
1282 }
1283 push_description_line(
1284 &mut lines,
1285 &node.properties.description,
1286 if full { None } else { Some(200) },
1287 );
1288 if full && !generated {
1289 if !node.properties.domain_area.is_empty() {
1290 lines.push(format!(
1291 "domain_area: {}",
1292 escape_cli_text(&node.properties.domain_area)
1293 ));
1294 }
1295 if let Some(scan) = node.properties.scan {
1296 lines.push(format!("scan: {scan}"));
1297 }
1298 if let Some(scan_ignore_unknown) = node.properties.scan_ignore_unknown {
1299 lines.push(format!("scan_ignore_unknown: {scan_ignore_unknown}"));
1300 }
1301 if !node.properties.provenance.is_empty() {
1302 lines.push(format!(
1303 "provenance: {}",
1304 escape_cli_text(&node.properties.provenance)
1305 ));
1306 }
1307 if let Some(confidence) = node.properties.confidence {
1308 lines.push(format!("confidence: {confidence}"));
1309 }
1310 lines.push(format!("importance: {}", node.properties.importance));
1311 if !node.properties.created_at.is_empty() {
1312 lines.push(format!("created_at: {}", node.properties.created_at));
1313 }
1314 }
1315
1316 let facts_to_show = if full {
1317 node.properties.key_facts.len()
1318 } else {
1319 node.properties.key_facts.len().min(2)
1320 };
1321 for fact in node.properties.key_facts.iter().take(facts_to_show) {
1322 lines.push(format!("- {}", escape_cli_text(fact)));
1323 }
1324 let omitted = node
1325 .properties
1326 .key_facts
1327 .len()
1328 .saturating_sub(facts_to_show);
1329 if omitted > 0 {
1330 lines.push(format!("... {omitted} more facts omitted"));
1331 }
1332
1333 if full && !generated {
1334 if !node.source_files.is_empty() {
1335 lines.push(format!(
1336 "sources: {}",
1337 node.source_files
1338 .iter()
1339 .map(|source| escape_cli_text(source))
1340 .collect::<Vec<_>>()
1341 .join(", ")
1342 ));
1343 }
1344 push_feedback_lines(
1345 &mut lines,
1346 node.properties.feedback_score,
1347 node.properties.feedback_count,
1348 node.properties.feedback_last_ts_ms,
1349 None,
1350 );
1351 }
1352
1353 let attached_notes: Vec<_> = graph
1354 .notes
1355 .iter()
1356 .filter(|note| note.node_id == node.id)
1357 .collect();
1358 if full && !attached_notes.is_empty() {
1359 lines.push(format!("notes: {}", attached_notes.len()));
1360 for note in attached_notes {
1361 lines.extend(render_attached_note_lines(note));
1362 }
1363 }
1364
1365 for edge in outgoing_edges(graph, &node.id, full, query) {
1366 if let Some(target) = graph.node_by_id(&edge.target_id) {
1367 lines.extend(render_edge_lines("->", edge, target, full));
1368 }
1369 }
1370 for edge in incoming_edges(graph, &node.id, full, query) {
1371 if let Some(source) = graph.node_by_id(&edge.source_id) {
1372 lines.extend(render_edge_lines("<-", edge, source, full));
1373 }
1374 }
1375
1376 lines.join("\n")
1377}
1378
1379fn outgoing_edges<'a>(
1380 graph: &'a GraphFile,
1381 node_id: &str,
1382 full: bool,
1383 query: Option<&str>,
1384) -> Vec<&'a Edge> {
1385 let mut edges: Vec<&Edge> = graph
1386 .edges
1387 .iter()
1388 .filter(|edge| edge.source_id == node_id)
1389 .collect();
1390 if let Some(query) = query {
1391 let query_terms = text_norm::expand_query_terms(query);
1392 if !query_terms.is_empty() {
1393 edges.sort_by(|left, right| {
1394 let right_score = directed_edge_query_relevance(graph, right, false, &query_terms);
1395 let left_score = directed_edge_query_relevance(graph, left, false, &query_terms);
1396 right_score
1397 .cmp(&left_score)
1398 .then_with(|| left.relation.cmp(&right.relation))
1399 .then_with(|| left.target_id.cmp(&right.target_id))
1400 });
1401 } else {
1402 edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1403 }
1404 } else {
1405 edges.sort_by_key(|edge| (&edge.relation, &edge.target_id));
1406 }
1407 if !full {
1408 edges.truncate(3);
1409 }
1410 edges
1411}
1412
1413fn incoming_edges<'a>(
1414 graph: &'a GraphFile,
1415 node_id: &str,
1416 full: bool,
1417 query: Option<&str>,
1418) -> Vec<&'a Edge> {
1419 let mut edges: Vec<&Edge> = graph
1420 .edges
1421 .iter()
1422 .filter(|edge| edge.target_id == node_id)
1423 .collect();
1424 if let Some(query) = query {
1425 let query_terms = text_norm::expand_query_terms(query);
1426 if !query_terms.is_empty() {
1427 edges.sort_by(|left, right| {
1428 let right_score = directed_edge_query_relevance(graph, right, true, &query_terms);
1429 let left_score = directed_edge_query_relevance(graph, left, true, &query_terms);
1430 right_score
1431 .cmp(&left_score)
1432 .then_with(|| left.relation.cmp(&right.relation))
1433 .then_with(|| left.source_id.cmp(&right.source_id))
1434 });
1435 } else {
1436 edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1437 }
1438 } else {
1439 edges.sort_by_key(|edge| (&edge.relation, &edge.source_id));
1440 }
1441 if !full {
1442 edges.truncate(3);
1443 }
1444 edges
1445}
1446
1447fn directed_edge_query_relevance(
1448 graph: &GraphFile,
1449 edge: &Edge,
1450 incoming: bool,
1451 query_terms: &[String],
1452) -> i64 {
1453 let related = if incoming {
1454 graph.node_by_id(&edge.source_id)
1455 } else {
1456 graph.node_by_id(&edge.target_id)
1457 };
1458 let mut score = query_overlap_score(&edge.relation, query_terms, 2)
1459 + query_overlap_score(&edge.properties.detail, query_terms, 2);
1460 if let Some(node) = related {
1461 score += query_overlap_score(&node.id, query_terms, 6);
1462 score += query_overlap_score(&node.name, query_terms, 5);
1463 score += query_overlap_score(&node.properties.description, query_terms, 2);
1464 for alias in &node.properties.alias {
1465 score += query_overlap_score(alias, query_terms, 4);
1466 }
1467 }
1468 score
1469}
1470
1471fn render_edge_lines(prefix: &str, edge: &Edge, related: &Node, full: bool) -> Vec<String> {
1472 let (arrow, relation) = if edge.relation.starts_with("NOT_") {
1473 (
1474 format!("{prefix}!"),
1475 edge.relation.trim_start_matches("NOT_"),
1476 )
1477 } else {
1478 (prefix.to_owned(), edge.relation.as_str())
1479 };
1480
1481 let mut line = format!(
1482 "{arrow} {relation} | {} | {}",
1483 related.id,
1484 escape_cli_text(&related.name)
1485 );
1486 if !edge.properties.detail.is_empty() {
1487 line.push_str(" | ");
1488 let detail = escape_cli_text(&edge.properties.detail);
1489 if full {
1490 line.push_str(&detail);
1491 } else {
1492 line.push_str(&truncate(&detail, 80));
1493 }
1494 }
1495 let mut lines = vec![line];
1496 if full {
1497 push_feedback_lines(
1498 &mut lines,
1499 edge.properties.feedback_score,
1500 edge.properties.feedback_count,
1501 edge.properties.feedback_last_ts_ms,
1502 Some("edge_"),
1503 );
1504 if !edge.properties.valid_from.is_empty() {
1505 lines.push(format!("edge_valid_from: {}", edge.properties.valid_from));
1506 }
1507 if !edge.properties.valid_to.is_empty() {
1508 lines.push(format!("edge_valid_to: {}", edge.properties.valid_to));
1509 }
1510 }
1511 lines
1512}
1513
1514fn truncate(value: &str, max_len: usize) -> String {
1515 let char_count = value.chars().count();
1516 if char_count <= max_len {
1517 return value.to_owned();
1518 }
1519 let truncated: String = value.chars().take(max_len.saturating_sub(3)).collect();
1520 format!("{truncated}...")
1521}
1522
1523fn escape_cli_text(value: &str) -> String {
1524 let mut out = String::new();
1525 for ch in value.chars() {
1526 match ch {
1527 '\\' => out.push_str("\\\\"),
1528 '\n' => out.push_str("\\n"),
1529 '\r' => out.push_str("\\r"),
1530 '\t' => out.push_str("\\t"),
1531 _ => out.push(ch),
1532 }
1533 }
1534 out
1535}
1536
1537fn push_description_line(lines: &mut Vec<String>, description: &str, max_len: Option<usize>) {
1538 if description.is_empty() {
1539 return;
1540 }
1541 let escaped = escape_cli_text(description);
1542 let rendered = match max_len {
1543 Some(limit) => truncate(&escaped, limit),
1544 None => escaped,
1545 };
1546 lines.push(format!("desc: {rendered}"));
1547}
1548
1549fn push_feedback_lines(
1550 lines: &mut Vec<String>,
1551 score: f64,
1552 count: u64,
1553 last_ts_ms: Option<u64>,
1554 prefix: Option<&str>,
1555) {
1556 let prefix = prefix.unwrap_or("");
1557 if score != 0.0 {
1558 lines.push(format!("{prefix}feedback_score: {score}"));
1559 }
1560 if count != 0 {
1561 lines.push(format!("{prefix}feedback_count: {count}"));
1562 }
1563 if let Some(ts) = last_ts_ms {
1564 lines.push(format!("{prefix}feedback_last_ts_ms: {ts}"));
1565 }
1566}
1567
1568fn render_attached_note_lines(note: &crate::graph::Note) -> Vec<String> {
1569 let mut lines = vec![format!("! {}", note.id)];
1570 if !note.body.is_empty() {
1571 lines.push(format!("note_body: {}", escape_cli_text(¬e.body)));
1572 }
1573 if !note.tags.is_empty() {
1574 lines.push(format!(
1575 "note_tags: {}",
1576 note.tags
1577 .iter()
1578 .map(|tag| escape_cli_text(tag))
1579 .collect::<Vec<_>>()
1580 .join(", ")
1581 ));
1582 }
1583 if !note.author.is_empty() {
1584 lines.push(format!("note_author: {}", escape_cli_text(¬e.author)));
1585 }
1586 if !note.created_at.is_empty() {
1587 lines.push(format!("note_created_at: {}", note.created_at));
1588 }
1589 if !note.provenance.is_empty() {
1590 lines.push(format!(
1591 "note_provenance: {}",
1592 escape_cli_text(¬e.provenance)
1593 ));
1594 }
1595 if !note.source_files.is_empty() {
1596 lines.push(format!(
1597 "note_sources: {}",
1598 note.source_files
1599 .iter()
1600 .map(|source| escape_cli_text(source))
1601 .collect::<Vec<_>>()
1602 .join(", ")
1603 ));
1604 }
1605 lines
1606}
1607
1608fn find_matches_with_index<'a>(
1609 graph: &'a GraphFile,
1610 query: &str,
1611 limit: usize,
1612 include_features: bool,
1613 include_metadata: bool,
1614 mode: FindMode,
1615 index: Option<&Bm25Index>,
1616 tune: Option<&FindTune>,
1617) -> Vec<ScoredNode<'a>> {
1618 let mut matches = find_all_matches_with_index(
1619 graph,
1620 query,
1621 include_features,
1622 include_metadata,
1623 mode,
1624 index,
1625 tune,
1626 );
1627 matches.truncate(limit);
1628 matches
1629}
1630
1631fn find_all_matches_with_index<'a>(
1632 graph: &'a GraphFile,
1633 query: &str,
1634 include_features: bool,
1635 include_metadata: bool,
1636 mode: FindMode,
1637 index: Option<&Bm25Index>,
1638 tune: Option<&FindTune>,
1639) -> Vec<ScoredNode<'a>> {
1640 let context = FindQueryContext::build(graph);
1641 let rewritten_query = rewrite_query(query);
1642 let fuzzy_query = if rewritten_query.is_empty() {
1643 query.to_owned()
1644 } else {
1645 rewritten_query
1646 };
1647 let mut scored: Vec<ScoredNode<'a>> = match mode {
1648 FindMode::Fuzzy => {
1649 let pattern = Pattern::parse(&fuzzy_query, CaseMatching::Ignore, Normalization::Smart);
1650 let mut matcher = Matcher::new(Config::DEFAULT);
1651 let candidates = graph
1652 .nodes
1653 .iter()
1654 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1655 .filter_map(|node| {
1656 score_node(&context, node, &fuzzy_query, &pattern, &mut matcher).map(|score| {
1657 RawCandidate {
1658 node,
1659 raw_relevance: score as f64,
1660 lexical_boost: 0,
1661 }
1662 })
1663 })
1664 .collect();
1665 compose_scores(candidates)
1666 }
1667 FindMode::Bm25 => compose_scores(score_bm25_raw(
1668 graph,
1669 &context,
1670 &fuzzy_query,
1671 include_features,
1672 include_metadata,
1673 index,
1674 )),
1675 FindMode::Hybrid => compose_scores(score_hybrid_raw(
1676 graph,
1677 &context,
1678 &fuzzy_query,
1679 include_features,
1680 include_metadata,
1681 index,
1682 tune.copied().unwrap_or_default(),
1683 )),
1684 };
1685
1686 scored.sort_by(|left, right| {
1687 right
1688 .score
1689 .cmp(&left.score)
1690 .then_with(|| left.node.id.cmp(&right.node.id))
1691 });
1692 let mut seen_ids = HashSet::new();
1693 scored.retain(|item| {
1694 let key = crate::validate::normalize_node_id(&item.node.id).to_ascii_lowercase();
1695 seen_ids.insert(key)
1696 });
1697 scored
1698}
1699
1700fn compose_scores<'a>(candidates: Vec<RawCandidate<'a>>) -> Vec<ScoredNode<'a>> {
1701 let max_raw = candidates
1702 .iter()
1703 .map(|candidate| candidate.raw_relevance)
1704 .fold(0.0f64, f64::max);
1705 let max_raw_log = max_raw.ln_1p();
1706
1707 candidates
1708 .into_iter()
1709 .filter_map(|candidate| {
1710 if candidate.raw_relevance <= 0.0 {
1711 return None;
1712 }
1713 let normalized_relevance = if max_raw_log > 0.0 {
1714 ((candidate.raw_relevance.ln_1p() / max_raw_log) * 1000.0).round() as i64
1715 } else {
1716 0
1717 };
1718 let feedback = feedback_boost(candidate.node);
1719 let importance = importance_boost(candidate.node);
1720 let authority_raw = feedback + importance;
1721 let relative_cap =
1722 ((normalized_relevance as f64) * SCORE_META_MAX_RATIO).round() as i64;
1723 let authority_cap = relative_cap.max(SCORE_META_MIN_CAP).min(SCORE_META_MAX_CAP);
1724 let authority_applied = authority_raw.clamp(-authority_cap, authority_cap);
1725 let final_score = normalized_relevance + authority_applied;
1726
1727 Some(ScoredNode {
1728 score: final_score,
1729 node: candidate.node,
1730 breakdown: ScoreBreakdown {
1731 raw_relevance: candidate.raw_relevance,
1732 normalized_relevance,
1733 lexical_boost: candidate.lexical_boost,
1734 feedback_boost: feedback,
1735 importance_boost: importance,
1736 authority_raw,
1737 authority_applied,
1738 authority_cap,
1739 },
1740 })
1741 })
1742 .collect()
1743}
1744
1745fn feedback_boost(node: &Node) -> i64 {
1746 let count = node.properties.feedback_count as f64;
1747 if count <= 0.0 {
1748 return 0;
1749 }
1750 let avg = node.properties.feedback_score / count;
1751 let confidence = (count.ln_1p() / 3.0).min(1.0);
1752 let scaled = avg * 200.0 * confidence;
1753 scaled.clamp(-300.0, 300.0).round() as i64
1754}
1755
1756fn importance_boost(node: &Node) -> i64 {
1757 let normalized_importance = if (0.0..=1.0).contains(&node.properties.importance) {
1758 node.properties.importance
1759 } else if (1.0..=6.0).contains(&node.properties.importance) {
1760 (node.properties.importance - 1.0) / 5.0
1761 } else {
1762 node.properties.importance.clamp(0.0, 1.0)
1763 };
1764 let normalized = (normalized_importance - IMPORTANCE_NEUTRAL) * 2.0;
1765 (normalized * IMPORTANCE_MAX_ABS_BOOST).round() as i64
1766}
1767
1768fn score_bm25_raw<'a>(
1769 graph: &'a GraphFile,
1770 context: &FindQueryContext<'a>,
1771 query: &str,
1772 include_features: bool,
1773 include_metadata: bool,
1774 index: Option<&Bm25Index>,
1775) -> Vec<RawCandidate<'a>> {
1776 let terms = text_norm::expand_query_terms(query);
1777 if terms.is_empty() {
1778 return Vec::new();
1779 }
1780
1781 if let Some(idx) = index {
1782 let results = idx.search(&terms, graph);
1783 return results
1784 .into_iter()
1785 .filter_map(|(node_id, score)| {
1786 let node = graph.node_by_id(&node_id)?;
1787 if !node_is_searchable(node, include_features, include_metadata) {
1788 return None;
1789 }
1790 let self_terms = node_self_document_terms(context, node);
1791 let neighbor_score =
1792 best_neighbor_bm25_score_with_index(context, node, &terms, idx);
1793 let base_score = combine_bm25_components(node, score as f64, neighbor_score);
1794 if base_score <= 0.0 {
1795 return None;
1796 }
1797 let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1798 idx.idf.get(term).copied().unwrap_or(0.0) as f64
1799 });
1800 let proximity_boost = bm25_proximity_boost(context, node, &terms);
1801 Some(RawCandidate {
1802 node,
1803 raw_relevance: base_score * 100.0
1804 + lexical_boost as f64
1805 + proximity_boost as f64,
1806 lexical_boost: lexical_boost + proximity_boost,
1807 })
1808 })
1809 .collect();
1810 }
1811
1812 let docs: Vec<(&'a Node, Vec<String>)> = graph
1813 .nodes
1814 .iter()
1815 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1816 .map(|node| (node, node_self_document_terms(context, node)))
1817 .collect();
1818
1819 if docs.is_empty() {
1820 return Vec::new();
1821 }
1822
1823 let mut df: HashMap<String, usize> = HashMap::new();
1824 for term in &terms {
1825 let mut count = 0usize;
1826 for (_, tokens) in &docs {
1827 if tokens.iter().any(|t| t == term) {
1828 count += 1;
1829 }
1830 }
1831 df.insert(term.clone(), count);
1832 }
1833
1834 let total_docs = docs.len() as f64;
1835 let avgdl = docs
1836 .iter()
1837 .map(|(_, tokens)| tokens.len() as f64)
1838 .sum::<f64>()
1839 / total_docs.max(1.0);
1840
1841 let mut idf_by_term: HashMap<String, f64> = HashMap::new();
1842 for term in &terms {
1843 let df_t = *df.get(term).unwrap_or(&0) as f64;
1844 let idf = (1.0 + (total_docs - df_t + 0.5) / (df_t + 0.5)).ln();
1845 idf_by_term.insert(term.clone(), idf);
1846 }
1847
1848 let mut scored = Vec::new();
1849
1850 for (node, self_terms) in docs {
1851 let self_score = bm25_document_score(&terms, &self_terms, &idf_by_term, avgdl);
1852 let neighbor_score = best_neighbor_bm25_score(context, node, &terms, &idf_by_term, avgdl);
1853 let base_score = combine_bm25_components(node, self_score, neighbor_score);
1854 if base_score <= 0.0 {
1855 continue;
1856 }
1857 let lexical_boost = bm25_lexical_boost_with_idf(&terms, &self_terms, |term| {
1858 idf_by_term.get(term).copied().unwrap_or(0.0)
1859 });
1860 let proximity_boost = bm25_proximity_boost(context, node, &terms);
1861 scored.push(RawCandidate {
1862 node,
1863 raw_relevance: base_score * 100.0 + lexical_boost as f64 + proximity_boost as f64,
1864 lexical_boost: lexical_boost + proximity_boost,
1865 });
1866 }
1867
1868 scored
1869}
1870
1871fn score_hybrid_raw<'a>(
1872 graph: &'a GraphFile,
1873 context: &FindQueryContext<'a>,
1874 query: &str,
1875 include_features: bool,
1876 include_metadata: bool,
1877 index: Option<&Bm25Index>,
1878 tune: FindTune,
1879) -> Vec<RawCandidate<'a>> {
1880 let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
1881 let mut matcher = Matcher::new(Config::DEFAULT);
1882
1883 let mut fuzzy_raw = HashMap::new();
1884 for node in graph
1885 .nodes
1886 .iter()
1887 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1888 {
1889 if let Some(score) = score_node(context, node, query, &pattern, &mut matcher) {
1890 fuzzy_raw.insert(node.id.as_str(), score as f64);
1891 }
1892 }
1893
1894 let bm25_candidates = score_bm25_raw(
1895 graph,
1896 context,
1897 query,
1898 include_features,
1899 include_metadata,
1900 index,
1901 );
1902 let mut bm25_raw = HashMap::new();
1903 let mut lexical_boost = HashMap::new();
1904 for candidate in bm25_candidates {
1905 bm25_raw.insert(candidate.node.id.as_str(), candidate.raw_relevance);
1906 lexical_boost.insert(candidate.node.id.as_str(), candidate.lexical_boost);
1907 }
1908
1909 let fuzzy_norm = normalize_raw_scores(&fuzzy_raw);
1910 let bm25_norm = normalize_raw_scores(&bm25_raw);
1911 let total_weight = (tune.bm25 + tune.fuzzy).max(0.0001);
1912
1913 graph
1914 .nodes
1915 .iter()
1916 .filter(|node| node_is_searchable(node, include_features, include_metadata))
1917 .filter_map(|node| {
1918 let f = fuzzy_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1919 let b = bm25_norm.get(node.id.as_str()).copied().unwrap_or(0.0);
1920 let combined = ((tune.fuzzy * f) + (tune.bm25 * b)) / total_weight;
1921 if combined <= 0.0 {
1922 return None;
1923 }
1924 Some(RawCandidate {
1925 node,
1926 raw_relevance: combined * 1000.0,
1927 lexical_boost: lexical_boost.get(node.id.as_str()).copied().unwrap_or(0),
1928 })
1929 })
1930 .collect()
1931}
1932
1933fn normalize_raw_scores<'a>(raw: &'a HashMap<&'a str, f64>) -> HashMap<&'a str, f64> {
1934 let max_raw = raw.values().copied().fold(0.0f64, f64::max);
1935 let max_log = max_raw.ln_1p();
1936 raw.iter()
1937 .map(|(id, value)| {
1938 let normalized = if max_log > 0.0 {
1939 value.ln_1p() / max_log
1940 } else {
1941 0.0
1942 };
1943 (*id, normalized.clamp(0.0, 1.0))
1944 })
1945 .collect()
1946}
1947
1948fn node_is_searchable(node: &Node, include_features: bool, include_metadata: bool) -> bool {
1949 (include_features || node.r#type != "Feature") && (include_metadata || node.r#type != "^")
1950}
1951
1952fn node_self_document_terms(context: &FindQueryContext<'_>, node: &Node) -> Vec<String> {
1953 let mut tokens = Vec::new();
1954 push_terms(&mut tokens, &node.id, BM25_ID_WEIGHT);
1955 push_terms(&mut tokens, &node.name, BM25_NAME_WEIGHT);
1956 push_terms(
1957 &mut tokens,
1958 &node.properties.description,
1959 BM25_DESCRIPTION_WEIGHT,
1960 );
1961 for alias in &node.properties.alias {
1962 push_terms(&mut tokens, alias, BM25_ALIAS_WEIGHT);
1963 }
1964 for fact in &node.properties.key_facts {
1965 push_terms(&mut tokens, fact, BM25_FACT_WEIGHT);
1966 }
1967 for note in context.notes_for(&node.id) {
1968 push_terms(&mut tokens, ¬e.body, BM25_NOTE_BODY_WEIGHT);
1969 for tag in ¬e.tags {
1970 push_terms(&mut tokens, tag, BM25_NOTE_TAG_WEIGHT);
1971 }
1972 }
1973 tokens
1974}
1975
1976fn neighbor_document_terms(neighbor: &Node) -> Vec<String> {
1977 let mut tokens = Vec::new();
1978 push_terms(&mut tokens, &neighbor.id, BM25_NEIGHBOR_WEIGHT);
1979 push_terms(&mut tokens, &neighbor.name, BM25_NEIGHBOR_WEIGHT);
1980 push_terms(
1981 &mut tokens,
1982 &neighbor.properties.description,
1983 BM25_NEIGHBOR_WEIGHT,
1984 );
1985 for alias in &neighbor.properties.alias {
1986 push_terms(&mut tokens, alias, BM25_NEIGHBOR_WEIGHT);
1987 }
1988 tokens
1989}
1990
1991fn fact_volume_normalizer(node: &Node) -> f64 {
1992 let fact_chars = node
1993 .properties
1994 .key_facts
1995 .iter()
1996 .map(|fact| fact.chars().count())
1997 .sum::<usize>() as f64;
1998 if fact_chars <= 0.0 {
1999 return 1.0;
2000 }
2001 let scaled = FACT_VOLUME_BASE_CHARS.sqrt() / fact_chars.sqrt();
2002 scaled.clamp(FACT_VOLUME_MIN_FACTOR, 1.0)
2003}
2004
2005fn bm25_document_score(
2006 query_terms: &[String],
2007 document_terms: &[String],
2008 idf_by_term: &HashMap<String, f64>,
2009 avgdl: f64,
2010) -> f64 {
2011 if query_terms.is_empty() || document_terms.is_empty() {
2012 return 0.0;
2013 }
2014 let dl = document_terms.len() as f64;
2015 if dl <= 0.0 {
2016 return 0.0;
2017 }
2018 let mut score = 0.0;
2019 for term in query_terms {
2020 let tf = document_terms.iter().filter(|token| *token == term).count() as f64;
2021 if tf <= 0.0 {
2022 continue;
2023 }
2024 let idf = idf_by_term.get(term).copied().unwrap_or(0.0);
2025 if idf <= 0.0 {
2026 continue;
2027 }
2028 let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
2029 score += idf * (tf * (BM25_K1 + 1.0) / denom);
2030 }
2031 score
2032}
2033
2034fn best_neighbor_bm25_score(
2035 context: &FindQueryContext<'_>,
2036 node: &Node,
2037 query_terms: &[String],
2038 idf_by_term: &HashMap<String, f64>,
2039 avgdl: f64,
2040) -> f64 {
2041 context
2042 .neighbors_for(&node.id)
2043 .iter()
2044 .map(|neighbor| {
2045 let neighbor_terms = neighbor_document_terms(neighbor);
2046 bm25_document_score(query_terms, &neighbor_terms, idf_by_term, avgdl)
2047 })
2048 .fold(0.0f64, f64::max)
2049}
2050
2051fn best_neighbor_bm25_score_with_index(
2052 context: &FindQueryContext<'_>,
2053 node: &Node,
2054 query_terms: &[String],
2055 index: &Bm25Index,
2056) -> f64 {
2057 let avgdl = index.avg_doc_len as f64;
2058 context
2059 .neighbors_for(&node.id)
2060 .iter()
2061 .map(|neighbor| {
2062 let neighbor_terms = neighbor_document_terms(neighbor);
2063 let dl = neighbor_terms.len() as f64;
2064 if dl <= 0.0 {
2065 return 0.0;
2066 }
2067 let mut score = 0.0;
2068 for term in query_terms {
2069 let idf = index.idf.get(term).copied().unwrap_or(0.0) as f64;
2070 if idf <= 0.0 {
2071 continue;
2072 }
2073 let tf = neighbor_terms.iter().filter(|token| *token == term).count() as f64;
2074 if tf <= 0.0 {
2075 continue;
2076 }
2077 let denom = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (dl / avgdl.max(1.0)));
2078 score += idf * (tf * (BM25_K1 + 1.0) / denom);
2079 }
2080 score
2081 })
2082 .fold(0.0f64, f64::max)
2083}
2084
2085fn combine_bm25_components(node: &Node, self_score: f64, neighbor_score: f64) -> f64 {
2086 let combined =
2087 BM25_SELF_CONTEXT_WEIGHT * self_score + BM25_NEIGHBOR_CONTEXT_WEIGHT * neighbor_score;
2088 combined * fact_volume_normalizer(node)
2089}
2090
2091fn push_terms(target: &mut Vec<String>, value: &str, weight: usize) {
2092 if value.is_empty() {
2093 return;
2094 }
2095 let terms = tokenize(value);
2096 for _ in 0..weight {
2097 target.extend(terms.iter().cloned());
2098 }
2099}
2100
2101fn tokenize(text: &str) -> Vec<String> {
2102 text_norm::tokenize(text)
2103}
2104
2105fn rewrite_query(query: &str) -> String {
2106 text_norm::expand_query_terms(query).join(" ")
2107}
2108
2109fn bm25_lexical_boost_with_idf<F>(
2110 query_terms: &[String],
2111 document_terms: &[String],
2112 idf_for: F,
2113) -> i64
2114where
2115 F: Fn(&str) -> f64,
2116{
2117 if query_terms.is_empty() || document_terms.is_empty() {
2118 return 0;
2119 }
2120 if query_terms.len() > 1 && contains_token_phrase(document_terms, query_terms) {
2121 return BM25_PHRASE_MATCH_BOOST;
2122 }
2123 let document_vocab: HashSet<&str> = document_terms.iter().map(String::as_str).collect();
2124 let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2125 let mut total_idf = 0.0;
2126 let mut matched_idf = 0.0;
2127 let mut matched_terms = 0i64;
2128 for term in query_vocab {
2129 let idf = idf_for(term).max(0.0);
2130 total_idf += if idf > 0.0 { idf } else { 1.0 };
2131 if document_vocab.contains(term) {
2132 matched_terms += 1;
2133 matched_idf += if idf > 0.0 { idf } else { 1.0 };
2134 }
2135 }
2136 if matched_terms == 0 {
2137 return 0;
2138 }
2139 ((matched_idf / total_idf.max(1.0)) * BM25_TOKEN_MATCH_BOOST as f64).round() as i64
2140}
2141
2142fn bm25_proximity_boost(
2143 context: &FindQueryContext<'_>,
2144 node: &Node,
2145 query_terms: &[String],
2146) -> i64 {
2147 if query_terms.len() < 2 {
2148 return 0;
2149 }
2150 let mut best_span_hits = proximity_hits_in_text(&node.id, query_terms)
2151 .max(proximity_hits_in_text(&node.name, query_terms))
2152 .max(proximity_hits_in_text(
2153 &node.properties.description,
2154 query_terms,
2155 ));
2156 for alias in &node.properties.alias {
2157 best_span_hits = best_span_hits.max(proximity_hits_in_text(alias, query_terms));
2158 }
2159 for fact in &node.properties.key_facts {
2160 best_span_hits = best_span_hits.max(proximity_hits_in_text(fact, query_terms));
2161 }
2162 for note in context.notes_for(&node.id) {
2163 best_span_hits = best_span_hits.max(proximity_hits_in_text(¬e.body, query_terms));
2164 for tag in ¬e.tags {
2165 best_span_hits = best_span_hits.max(proximity_hits_in_text(tag, query_terms));
2166 }
2167 }
2168 if best_span_hits < 2 {
2169 0
2170 } else {
2171 BM25_PROXIMITY_MATCH_BOOST + (best_span_hits as i64 - 2) * 20
2172 }
2173}
2174
2175fn proximity_hits_in_text(value: &str, query_terms: &[String]) -> usize {
2176 if value.is_empty() || query_terms.len() < 2 {
2177 return 0;
2178 }
2179 let tokens = tokenize(value);
2180 if tokens.len() < 2 {
2181 return 0;
2182 }
2183 let query_vocab: HashSet<&str> = query_terms.iter().map(String::as_str).collect();
2184 let mut best = 0usize;
2185 for start in 0..tokens.len() {
2186 let end = (start + BM25_PROXIMITY_WINDOW_TOKENS).min(tokens.len());
2187 let mut seen: HashSet<&str> = HashSet::new();
2188 for token in &tokens[start..end] {
2189 if query_vocab.contains(token.as_str()) {
2190 seen.insert(token.as_str());
2191 }
2192 }
2193 best = best.max(seen.len());
2194 }
2195 best
2196}
2197
2198fn contains_token_phrase(document_terms: &[String], query_terms: &[String]) -> bool {
2199 if query_terms.is_empty() || query_terms.len() > document_terms.len() {
2200 return false;
2201 }
2202 document_terms
2203 .windows(query_terms.len())
2204 .any(|window| window == query_terms)
2205}
2206
2207fn score_node(
2208 context: &FindQueryContext<'_>,
2209 node: &Node,
2210 query: &str,
2211 pattern: &Pattern,
2212 matcher: &mut Matcher,
2213) -> Option<u32> {
2214 let mut primary_score = 0;
2215 let mut primary_hits = 0;
2216
2217 let id_score = score_primary_field(query, pattern, matcher, &node.id, 5);
2218 if id_score > 0 {
2219 primary_hits += 1;
2220 }
2221 primary_score += id_score;
2222
2223 let name_score = score_primary_field(query, pattern, matcher, &node.name, 4);
2224 if name_score > 0 {
2225 primary_hits += 1;
2226 }
2227 primary_score += name_score;
2228
2229 for alias in &node.properties.alias {
2230 let alias_score = score_primary_field(query, pattern, matcher, alias, 4);
2231 if alias_score > 0 {
2232 primary_hits += 1;
2233 }
2234 primary_score += alias_score;
2235 }
2236
2237 let mut contextual_score = score_secondary_field(
2238 query,
2239 pattern,
2240 matcher,
2241 &node.properties.description,
2242 FUZZY_DESCRIPTION_WEIGHT,
2243 );
2244 let mut facts_score = 0;
2245 for fact in &node.properties.key_facts {
2246 facts_score += score_secondary_field(query, pattern, matcher, fact, FUZZY_FACT_WEIGHT);
2247 }
2248 let facts_factor = fact_volume_normalizer(node);
2249 contextual_score += ((facts_score as f64) * facts_factor).round() as u32;
2250 contextual_score += score_notes_context(context, node, query, pattern, matcher);
2251
2252 let neighbor_context = score_neighbor_context(context, node, query, pattern, matcher)
2253 .min(FUZZY_NEIGHBOR_CONTEXT_CAP);
2254 contextual_score += neighbor_context / FUZZY_NEIGHBOR_CONTEXT_DIVISOR;
2255
2256 if primary_hits == 0 {
2257 contextual_score /= FUZZY_NO_PRIMARY_CONTEXT_DIVISOR;
2258 }
2259
2260 let total = primary_score + contextual_score;
2261 (total > 0).then_some(total)
2262}
2263
2264fn score_notes_context(
2265 context: &FindQueryContext<'_>,
2266 node: &Node,
2267 query: &str,
2268 pattern: &Pattern,
2269 matcher: &mut Matcher,
2270) -> u32 {
2271 let mut total = 0;
2272 for note in context.notes_for(&node.id) {
2273 total += score_secondary_field(query, pattern, matcher, ¬e.body, FUZZY_NOTE_BODY_WEIGHT);
2274 for tag in ¬e.tags {
2275 total += score_secondary_field(query, pattern, matcher, tag, FUZZY_NOTE_TAG_WEIGHT);
2276 }
2277 }
2278 total
2279}
2280
2281fn score_neighbor_context(
2282 context: &FindQueryContext<'_>,
2283 node: &Node,
2284 query: &str,
2285 pattern: &Pattern,
2286 matcher: &mut Matcher,
2287) -> u32 {
2288 let mut best = 0;
2289
2290 for neighbor in context.neighbors_for(&node.id) {
2291 let mut score = score_secondary_field(query, pattern, matcher, &neighbor.id, 1)
2292 + score_secondary_field(query, pattern, matcher, &neighbor.name, 1)
2293 + score_secondary_field(query, pattern, matcher, &neighbor.properties.description, 1);
2294
2295 for alias in &neighbor.properties.alias {
2296 score += score_secondary_field(query, pattern, matcher, alias, 1);
2297 }
2298
2299 best = best.max(score);
2300 }
2301
2302 best
2303}
2304
2305fn score_field(pattern: &Pattern, matcher: &mut Matcher, value: &str) -> Option<u32> {
2306 if value.is_empty() {
2307 return None;
2308 }
2309 let mut buf = Vec::new();
2310 let haystack = Utf32Str::new(value, &mut buf);
2311 pattern.score(haystack, matcher)
2312}
2313
2314fn score_primary_field(
2315 query: &str,
2316 pattern: &Pattern,
2317 matcher: &mut Matcher,
2318 value: &str,
2319 weight: u32,
2320) -> u32 {
2321 let bonus = textual_bonus(query, value);
2322 let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2323 if bonus == 0 && fuzzy == 0 {
2324 return 0;
2325 }
2326 (fuzzy + bonus) * weight
2327}
2328
2329fn score_secondary_field(
2330 query: &str,
2331 pattern: &Pattern,
2332 matcher: &mut Matcher,
2333 value: &str,
2334 weight: u32,
2335) -> u32 {
2336 let bonus = textual_bonus(query, value);
2337 let fuzzy = score_field(pattern, matcher, value).unwrap_or(0);
2338 if bonus == 0 && fuzzy == 0 {
2339 return 0;
2340 }
2341 (fuzzy + bonus / 2) * weight
2342}
2343
2344fn textual_bonus(query: &str, value: &str) -> u32 {
2345 let query = query.trim().to_lowercase();
2346 let value = value.to_lowercase();
2347
2348 if value == query {
2349 return 400;
2350 }
2351 if value.contains(&query) {
2352 return 200;
2353 }
2354
2355 query
2356 .split_whitespace()
2357 .map(|token| {
2358 if value.contains(token) {
2359 80
2360 } else if is_subsequence(token, &value) {
2361 40
2362 } else {
2363 0
2364 }
2365 })
2366 .sum()
2367}
2368
2369fn is_subsequence(needle: &str, haystack: &str) -> bool {
2370 if needle.is_empty() {
2371 return false;
2372 }
2373
2374 let mut chars = needle.chars();
2375 let mut current = match chars.next() {
2376 Some(ch) => ch,
2377 None => return false,
2378 };
2379
2380 for ch in haystack.chars() {
2381 if ch == current {
2382 match chars.next() {
2383 Some(next) => current = next,
2384 None => return true,
2385 }
2386 }
2387 }
2388
2389 false
2390}
2391
2392#[cfg(test)]
2393mod tests {
2394 use super::*;
2395
2396 fn make_node(
2397 id: &str,
2398 name: &str,
2399 description: &str,
2400 key_facts: &[&str],
2401 alias: &[&str],
2402 importance: f64,
2403 feedback_score: f64,
2404 feedback_count: u64,
2405 ) -> Node {
2406 let mut properties = crate::graph::NodeProperties::default();
2407 properties.description = description.to_owned();
2408 properties.key_facts = key_facts.iter().map(|v| (*v).to_owned()).collect();
2409 properties.alias = alias.iter().map(|v| (*v).to_owned()).collect();
2410 properties.importance = importance;
2411 properties.feedback_score = feedback_score;
2412 properties.feedback_count = feedback_count;
2413 Node {
2414 id: id.to_owned(),
2415 r#type: "Concept".to_owned(),
2416 name: name.to_owned(),
2417 properties,
2418 source_files: Vec::new(),
2419 }
2420 }
2421
2422 fn make_edge(source_id: &str, relation: &str, target_id: &str) -> Edge {
2423 Edge {
2424 source_id: source_id.to_owned(),
2425 relation: relation.to_owned(),
2426 target_id: target_id.to_owned(),
2427 properties: crate::graph::EdgeProperties::default(),
2428 }
2429 }
2430
2431 fn score_for(results: &[ScoredNode<'_>], id: &str) -> i64 {
2432 results
2433 .iter()
2434 .find(|item| item.node.id == id)
2435 .map(|item| item.score)
2436 .expect("score for node")
2437 }
2438
2439 #[test]
2440 fn textual_bonus_tiers_are_stable() {
2441 assert_eq!(textual_bonus("abc", "abc"), 400);
2442 assert_eq!(textual_bonus("abc", "xxabcxx"), 200);
2443 assert_eq!(textual_bonus("abc def", "aa abc and def zz"), 160);
2444 assert_eq!(textual_bonus("abc", "aXbYc"), 40);
2445 assert_eq!(textual_bonus("abc", "zzz"), 0);
2446 }
2447
2448 #[test]
2449 fn tokenize_handles_unicode_casefolding() {
2450 let tokens = tokenize("ŁÓDŹ smart-home");
2451 assert_eq!(tokens, vec!["łódź", "smart", "home"]);
2452 }
2453
2454 #[test]
2455 fn bm25_lexical_boost_prefers_phrase_then_tokens() {
2456 let query_terms = tokenize("smart home api");
2457 assert_eq!(
2458 bm25_lexical_boost_with_idf(&query_terms, &tokenize("x smart home api y"), |_| 1.0),
2459 120
2460 );
2461 assert_eq!(
2462 bm25_lexical_boost_with_idf(&query_terms, &tokenize("smart x api y home"), |_| 1.0),
2463 45
2464 );
2465 assert_eq!(
2466 bm25_lexical_boost_with_idf(&query_terms, &tokenize("nothing here"), |_| 1.0),
2467 0
2468 );
2469 }
2470
2471 #[test]
2472 fn score_node_uses_key_facts_and_notes_without_primary_match() {
2473 let node = make_node(
2474 "concept:gateway",
2475 "Gateway",
2476 "",
2477 &["Autentykacja OAuth2 przez konto producenta"],
2478 &[],
2479 0.5,
2480 0.0,
2481 0,
2482 );
2483 let mut graph = GraphFile::new("test");
2484 graph.nodes.push(node.clone());
2485 graph.notes.push(crate::graph::Note {
2486 id: "note:oauth".to_owned(),
2487 node_id: node.id.clone(),
2488 body: "Token refresh przez OAuth2".to_owned(),
2489 tags: vec!["oauth2".to_owned()],
2490 ..Default::default()
2491 });
2492
2493 let pattern = Pattern::parse(
2494 "oauth2 producenta",
2495 CaseMatching::Ignore,
2496 Normalization::Smart,
2497 );
2498 let context = FindQueryContext::build(&graph);
2499 let mut matcher = Matcher::new(Config::DEFAULT);
2500 let score = score_node(&context, &node, "oauth2 producenta", &pattern, &mut matcher);
2501 assert!(score.is_some_and(|value| value > 0));
2502
2503 let empty_graph = GraphFile::new("empty");
2504 let empty_node = make_node("concept:gateway", "Gateway", "", &[], &[], 0.5, 0.0, 0);
2505 let empty_context = FindQueryContext::build(&empty_graph);
2506 let mut matcher = Matcher::new(Config::DEFAULT);
2507 let empty_score = score_node(
2508 &empty_context,
2509 &empty_node,
2510 "oauth2 producenta",
2511 &pattern,
2512 &mut matcher,
2513 );
2514 assert!(empty_score.is_none());
2515 }
2516
2517 #[test]
2518 fn score_bm25_respects_importance_boost_for_equal_documents() {
2519 let mut graph = GraphFile::new("test");
2520 graph.nodes.push(make_node(
2521 "concept:high",
2522 "High",
2523 "smart home api",
2524 &[],
2525 &[],
2526 1.0,
2527 0.0,
2528 0,
2529 ));
2530 graph.nodes.push(make_node(
2531 "concept:low",
2532 "Low",
2533 "smart home api",
2534 &[],
2535 &[],
2536 0.0,
2537 0.0,
2538 0,
2539 ));
2540
2541 let results = find_all_matches_with_index(
2542 &graph,
2543 "smart home api",
2544 true,
2545 false,
2546 FindMode::Bm25,
2547 None,
2548 None,
2549 );
2550 let high_score = score_for(&results, "concept:high");
2551 let low_score = score_for(&results, "concept:low");
2552 assert!(high_score > low_score);
2553 }
2554
2555 #[test]
2556 fn bm25_prefers_self_match_over_neighbor_only_match() {
2557 let mut graph = GraphFile::new("test");
2558 graph.nodes.push(make_node(
2559 "concept:self_hit",
2560 "Batch plugin output directory",
2561 "",
2562 &["BatchPlugin OUTPUT_DIR rule in WebLogic path"],
2563 &[],
2564 0.5,
2565 0.0,
2566 0,
2567 ));
2568 graph.nodes.push(make_node(
2569 "concept:hub",
2570 "Integration Hub",
2571 "gateway for many systems",
2572 &[],
2573 &[],
2574 0.5,
2575 0.0,
2576 0,
2577 ));
2578 graph.nodes.push(make_node(
2579 "concept:neighbor_hit",
2580 "BatchPlugin OUTPUT_DIR in WebLogic",
2581 "",
2582 &[],
2583 &[],
2584 0.5,
2585 0.0,
2586 0,
2587 ));
2588 graph
2589 .edges
2590 .push(make_edge("concept:hub", "HAS", "concept:neighbor_hit"));
2591
2592 let results = find_all_matches_with_index(
2593 &graph,
2594 "BatchPlugin OUTPUT_DIR WebLogic",
2595 true,
2596 false,
2597 FindMode::Bm25,
2598 None,
2599 None,
2600 );
2601
2602 assert!(results.iter().any(|item| item.node.id == "concept:hub"));
2603 assert!(score_for(&results, "concept:self_hit") > score_for(&results, "concept:hub"));
2604 }
2605
2606 #[test]
2607 fn link_rendering_sorts_incident_edges_by_query_relevance() {
2608 let mut graph = GraphFile::new("test");
2609 graph.nodes.push(make_node(
2610 "concept:center",
2611 "Center",
2612 "",
2613 &[],
2614 &[],
2615 0.5,
2616 0.0,
2617 0,
2618 ));
2619 graph.nodes.push(make_node(
2620 "concept:relevant",
2621 "Push notification template",
2622 "",
2623 &[],
2624 &[],
2625 0.2,
2626 0.0,
2627 0,
2628 ));
2629 graph.nodes.push(make_node(
2630 "concept:irrelevant_a",
2631 "Billing ledger",
2632 "",
2633 &[],
2634 &[],
2635 0.9,
2636 0.0,
2637 0,
2638 ));
2639 graph.nodes.push(make_node(
2640 "concept:irrelevant_b",
2641 "Audit trail",
2642 "",
2643 &[],
2644 &[],
2645 0.8,
2646 0.0,
2647 0,
2648 ));
2649 graph
2650 .edges
2651 .push(make_edge("concept:center", "HAS", "concept:irrelevant_a"));
2652 graph
2653 .edges
2654 .push(make_edge("concept:center", "HAS", "concept:irrelevant_b"));
2655 graph
2656 .edges
2657 .push(make_edge("concept:center", "HAS", "concept:relevant"));
2658
2659 let center = graph.node_by_id("concept:center").expect("center node");
2660 let lines = render_node_link_lines(&graph, center, 2, Some("push notification template"));
2661
2662 let first_edge = lines
2663 .iter()
2664 .find(|line| line.starts_with("-> "))
2665 .expect("first edge line");
2666 assert!(first_edge.contains("concept:relevant"));
2667 }
2668
2669 #[test]
2670 fn final_score_caps_authority_boost_for_weak_relevance() {
2671 let weak = make_node(
2672 "concept:weak",
2673 "Weak",
2674 "smart home api",
2675 &[],
2676 &[],
2677 1.0,
2678 300.0,
2679 1,
2680 );
2681 let strong = make_node(
2682 "concept:strong",
2683 "Strong",
2684 "smart home api smart home api smart home api smart home api",
2685 &[],
2686 &[],
2687 0.5,
2688 0.0,
2689 0,
2690 );
2691 let candidates = vec![
2692 RawCandidate {
2693 node: &weak,
2694 raw_relevance: 12.0,
2695 lexical_boost: 0,
2696 },
2697 RawCandidate {
2698 node: &strong,
2699 raw_relevance: 100.0,
2700 lexical_boost: 0,
2701 },
2702 ];
2703 let scored = compose_scores(candidates);
2704 let weak_scored = scored
2705 .iter()
2706 .find(|item| item.node.id == "concept:weak")
2707 .expect("weak node");
2708 assert_eq!(
2709 weak_scored.breakdown.authority_applied,
2710 weak_scored.breakdown.authority_cap
2711 );
2712 assert!(weak_scored.breakdown.authority_raw > weak_scored.breakdown.authority_cap);
2713 }
2714
2715 #[test]
2716 fn importance_and_feedback_boost_have_expected_ranges() {
2717 let high_importance = make_node("concept:high", "High", "", &[], &[], 1.0, 0.0, 0);
2718 let low_importance = make_node("concept:low", "Low", "", &[], &[], 0.0, 0.0, 0);
2719 assert_eq!(importance_boost(&high_importance), 66);
2720 assert_eq!(importance_boost(&low_importance), -66);
2721
2722 let positive = make_node("concept:pos", "Pos", "", &[], &[], 0.5, 1.0, 1);
2723 let negative = make_node("concept:neg", "Neg", "", &[], &[], 0.5, -2.0, 1);
2724 let saturated = make_node("concept:sat", "Sat", "", &[], &[], 0.5, 300.0, 1);
2725 assert_eq!(feedback_boost(&positive), 46);
2726 assert_eq!(feedback_boost(&negative), -92);
2727 assert_eq!(feedback_boost(&saturated), 300);
2728 }
2729
2730 #[test]
2731 fn find_deduplicates_results_by_node_id_for_single_query() {
2732 let mut graph = GraphFile::new("test");
2733 graph.nodes.push(make_node(
2734 "concept:rule",
2735 "Business Rule",
2736 "Rule for billing decisions",
2737 &["Business rule validation"],
2738 &["billing rule"],
2739 0.5,
2740 0.0,
2741 0,
2742 ));
2743 graph.nodes.push(make_node(
2744 "concept:rule",
2745 "Business Rule Duplicate",
2746 "Duplicate record with same id",
2747 &["Business rule duplicate"],
2748 &[],
2749 0.5,
2750 0.0,
2751 0,
2752 ));
2753
2754 let results = find_all_matches_with_index(
2755 &graph,
2756 "business rule",
2757 true,
2758 false,
2759 FindMode::Hybrid,
2760 None,
2761 None,
2762 );
2763 let rule_hits = results
2764 .iter()
2765 .filter(|item| item.node.id == "concept:rule")
2766 .count();
2767 assert_eq!(rule_hits, 1);
2768 }
2769
2770 #[test]
2771 fn hybrid_score_does_not_change_when_only_vector_weight_changes() {
2772 let mut graph = GraphFile::new("test");
2773 graph.nodes.push(make_node(
2774 "concept:auth",
2775 "Authentication Rule",
2776 "Business rule for authentication",
2777 &["auth rule"],
2778 &["login policy"],
2779 0.5,
2780 0.0,
2781 0,
2782 ));
2783
2784 let with_vector = find_all_matches_with_index(
2785 &graph,
2786 "authentication rule",
2787 true,
2788 false,
2789 FindMode::Hybrid,
2790 None,
2791 Some(&FindTune {
2792 bm25: 0.55,
2793 fuzzy: 0.35,
2794 vector: 1.0,
2795 }),
2796 );
2797 let no_vector = find_all_matches_with_index(
2798 &graph,
2799 "authentication rule",
2800 true,
2801 false,
2802 FindMode::Hybrid,
2803 None,
2804 Some(&FindTune {
2805 bm25: 0.55,
2806 fuzzy: 0.35,
2807 vector: 0.0,
2808 }),
2809 );
2810
2811 assert_eq!(with_vector.len(), 1);
2812 assert_eq!(no_vector.len(), 1);
2813 assert_eq!(with_vector[0].score, no_vector[0].score);
2814 }
2815
2816 #[test]
2817 fn find_hides_metadata_nodes_unless_enabled() {
2818 let mut graph = GraphFile::new("test");
2819 graph.nodes.push(make_node(
2820 "^:graph_info",
2821 "Graph Metadata",
2822 "Internal metadata",
2823 &["graph_uuid=abc123"],
2824 &[],
2825 0.5,
2826 0.0,
2827 0,
2828 ));
2829 if let Some(meta) = graph
2830 .nodes
2831 .iter_mut()
2832 .find(|node| node.id == "^:graph_info")
2833 {
2834 meta.r#type = "^".to_owned();
2835 }
2836
2837 let hidden = find_all_matches_with_index(
2838 &graph,
2839 "graph uuid",
2840 true,
2841 false,
2842 FindMode::Hybrid,
2843 None,
2844 None,
2845 );
2846 assert!(hidden.is_empty());
2847
2848 let shown = find_all_matches_with_index(
2849 &graph,
2850 "graph uuid",
2851 true,
2852 true,
2853 FindMode::Hybrid,
2854 None,
2855 None,
2856 );
2857 assert_eq!(shown.len(), 1);
2858 assert_eq!(shown[0].node.id, "^:graph_info");
2859 }
2860}