1use std::{
2 cmp::Ordering,
3 collections::{BTreeMap, HashMap},
4};
5
6use blake3::hash;
7use serde::{Deserialize, Serialize};
8
9use crate::{MemvidError, Result, types::FrameId};
10
11fn lex_config() -> impl bincode::config::Config {
13 bincode::config::standard()
14 .with_fixed_int_encoding()
15 .with_little_endian()
16}
17
18#[allow(clippy::cast_possible_truncation)]
19const LEX_DECODE_LIMIT: usize = crate::MAX_INDEX_BYTES as usize;
20const LEX_SECTION_SOFT_CHARS: usize = 900;
21const LEX_SECTION_HARD_CHARS: usize = 1400;
22const LEX_SECTION_MAX_COUNT: usize = 2048;
23
24#[derive(Default)]
26pub struct LexIndexBuilder {
27 documents: Vec<LexDocument>,
28}
29
30impl LexIndexBuilder {
31 #[must_use]
32 pub fn new() -> Self {
33 Self::default()
34 }
35
36 pub fn add_document(
37 &mut self,
38 frame_id: FrameId,
39 uri: &str,
40 title: Option<&str>,
41 content: &str,
42 tags: &HashMap<String, String>,
43 ) {
44 let tokens = tokenize(content);
45 let tags: BTreeMap<_, _> = tags.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
47 let mut sections = chunk_sections(content);
48
49 let (content_owned, content_lower) = if content.is_empty() {
50 (String::new(), String::new())
51 } else if sections.is_empty() {
52 let owned = content.to_string();
53 let lower = owned.to_ascii_lowercase();
54 sections.push(LexSection {
55 offset: 0,
56 content: owned.clone(),
57 content_lower: lower.clone(),
58 });
59 (owned, lower)
60 } else {
61 (String::new(), String::new())
62 };
63 self.documents.push(LexDocument {
64 frame_id,
65 tokens,
66 tags,
67 content: content_owned,
68 content_lower,
69 uri: Some(uri.to_string()),
70 title: title.map(ToString::to_string),
71 sections,
72 });
73 }
74
75 pub fn finish(mut self) -> Result<LexIndexArtifact> {
76 for document in &mut self.documents {
77 document.ensure_sections();
78 }
79 let bytes = bincode::serde::encode_to_vec(&self.documents, lex_config())?;
80 let checksum = *hash(&bytes).as_bytes();
81 Ok(LexIndexArtifact {
82 bytes,
83 doc_count: self.documents.len() as u64,
84 checksum,
85 })
86 }
87}
88
89#[derive(Debug, Clone)]
91pub struct LexIndexArtifact {
92 pub bytes: Vec<u8>,
93 pub doc_count: u64,
94 pub checksum: [u8; 32],
95}
96
97#[derive(Debug, Clone)]
99pub struct LexIndex {
100 documents: Vec<LexDocument>,
101}
102
103impl LexIndex {
104 pub fn decode(bytes: &[u8]) -> Result<Self> {
105 let new_config = bincode::config::standard()
106 .with_fixed_int_encoding()
107 .with_little_endian()
108 .with_limit::<LEX_DECODE_LIMIT>();
109 if let Ok((documents, read)) =
110 bincode::serde::decode_from_slice::<Vec<LexDocument>, _>(bytes, new_config)
111 {
112 if read == bytes.len() {
113 return Ok(Self::from_documents(documents));
114 }
115 }
116
117 let legacy_fixed = bincode::config::standard()
118 .with_fixed_int_encoding()
119 .with_little_endian()
120 .with_limit::<LEX_DECODE_LIMIT>();
121 if let Ok((legacy_docs, read)) =
122 bincode::serde::decode_from_slice::<Vec<LegacyLexDocument>, _>(bytes, legacy_fixed)
123 {
124 if read == bytes.len() {
125 let documents = legacy_docs.into_iter().map(legacy_to_current).collect();
126 return Ok(Self::from_documents(documents));
127 }
128 }
129
130 let legacy_config = bincode::config::standard()
131 .with_little_endian()
132 .with_limit::<LEX_DECODE_LIMIT>();
133 if let Ok((legacy_docs, read)) =
134 bincode::serde::decode_from_slice::<Vec<LegacyLexDocument>, _>(bytes, legacy_config)
135 {
136 if read == bytes.len() {
137 let documents = legacy_docs.into_iter().map(legacy_to_current).collect();
138 return Ok(Self::from_documents(documents));
139 }
140 }
141
142 Err(MemvidError::InvalidToc {
143 reason: "unsupported lex index encoding".into(),
144 })
145 }
146
147 fn from_documents(mut documents: Vec<LexDocument>) -> Self {
148 for document in &mut documents {
149 document.ensure_sections();
150 }
151 Self { documents }
152 }
153
154 #[must_use]
155 pub fn search(&self, query: &str, limit: usize) -> Vec<LexSearchHit> {
156 let mut query_tokens = tokenize(query);
157 query_tokens.retain(|token| !token.is_empty());
158 if query_tokens.is_empty() {
159 return Vec::new();
160 }
161 let mut matches = self.compute_matches(&query_tokens, None, None);
162 matches.truncate(limit);
163 matches
164 .into_iter()
165 .map(|m| {
166 let snippets = build_snippets(&m.content, &m.occurrences, 160, 3);
167 LexSearchHit {
168 frame_id: m.frame_id,
169 score: m.score,
170 match_count: m.occurrences.len(),
171 snippets,
172 }
173 })
174 .collect()
175 }
176
177 pub(crate) fn documents_mut(&mut self) -> &mut [LexDocument] {
178 &mut self.documents
179 }
180
181 pub(crate) fn remove_document(&mut self, frame_id: FrameId) {
182 self.documents.retain(|doc| doc.frame_id != frame_id);
183 }
184
185 pub(crate) fn compute_matches(
186 &self,
187 query_tokens: &[String],
188 uri_filter: Option<&str>,
189 scope_filter: Option<&str>,
190 ) -> Vec<LexMatch> {
191 if query_tokens.is_empty() {
192 return Vec::new();
193 }
194
195 let mut hits = Vec::new();
196 let phrase = query_tokens.join(" ");
197 for document in &self.documents {
198 if let Some(uri) = uri_filter {
199 if !uri_matches(document.uri.as_deref(), uri) {
200 continue;
201 }
202 } else if let Some(scope) = scope_filter {
203 match document.uri.as_deref() {
204 Some(candidate) if candidate.starts_with(scope) => {}
205 _ => continue,
206 }
207 }
208
209 if document.sections.is_empty() {
210 continue;
211 }
212
213 for section in &document.sections {
214 let haystack = section.content_lower.as_str();
215 if haystack.is_empty() {
216 continue;
217 }
218
219 let mut occurrences: Vec<(usize, usize)> = Vec::new();
220
221 if query_tokens.len() == 1 {
222 let needle = &query_tokens[0];
223 if needle.is_empty() {
224 continue;
225 }
226 let mut start = 0usize;
227 while let Some(idx) = haystack[start..].find(needle) {
228 let local_start = start + idx;
229 let local_end = local_start + needle.len();
230 occurrences.push((local_start, local_end));
231 start = local_end;
232 }
233 } else {
234 let mut all_occurrences = Vec::new();
235 let mut all_present = true;
236 for needle in query_tokens {
237 if needle.is_empty() {
238 all_present = false;
239 break;
240 }
241 let mut start = 0usize;
242 let mut found_for_token = false;
243 while let Some(idx) = haystack[start..].find(needle) {
244 found_for_token = true;
245 let local_start = start + idx;
246 let local_end = local_start + needle.len();
247 all_occurrences.push((local_start, local_end));
248 start = local_end;
249 }
250 if !found_for_token {
251 all_present = false;
252 break;
253 }
254 }
255 if !all_present {
256 continue;
257 }
258 occurrences = all_occurrences;
259 }
260
261 if occurrences.is_empty() {
262 continue;
263 }
264
265 occurrences.sort_by_key(|(start, _)| *start);
266 #[allow(clippy::cast_precision_loss)]
267 let mut score = occurrences.len() as f32;
268 if !phrase.is_empty() && section.content_lower.contains(&phrase) {
269 score += 1000.0;
270 }
271 hits.push(LexMatch {
272 frame_id: document.frame_id,
273 score,
274 occurrences,
275 content: section.content.clone(),
276 uri: document.uri.clone(),
277 title: document.title.clone(),
278 chunk_offset: section.offset,
279 });
280 }
281 }
282
283 hits.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(Ordering::Equal));
284
285 let mut seen_frames: std::collections::HashSet<FrameId> = std::collections::HashSet::new();
289 let mut deduped = Vec::with_capacity(hits.len());
290 for hit in hits {
291 if seen_frames.insert(hit.frame_id) {
292 deduped.push(hit);
293 }
294 }
295 deduped
296 }
297}
298
299fn uri_matches(candidate: Option<&str>, expected: &str) -> bool {
300 let Some(uri) = candidate else {
301 return false;
302 };
303 if expected.contains('#') {
304 uri.eq_ignore_ascii_case(expected)
305 } else {
306 let expected_lower = expected.to_ascii_lowercase();
307 let candidate_lower = uri.to_ascii_lowercase();
308 candidate_lower.starts_with(&expected_lower)
309 }
310}
311
312#[derive(Debug, Clone, Serialize, Deserialize)]
313pub(crate) struct LexDocument {
314 pub(crate) frame_id: FrameId,
315 tokens: Vec<String>,
316 tags: BTreeMap<String, String>,
317 #[serde(default)]
318 content: String,
319 #[serde(default)]
320 pub(crate) content_lower: String,
321 #[serde(default)]
322 pub(crate) uri: Option<String>,
323 #[serde(default)]
324 pub(crate) title: Option<String>,
325 #[serde(default)]
326 sections: Vec<LexSection>,
327}
328
329#[derive(Debug, Clone, Serialize, Deserialize)]
330pub(crate) struct LexSection {
331 pub(crate) offset: usize,
332 #[serde(default)]
333 pub(crate) content: String,
334 #[serde(default)]
335 pub(crate) content_lower: String,
336}
337
338#[derive(Debug, Clone, Serialize, Deserialize)]
339struct LegacyLexDocument {
340 frame_id: FrameId,
341 tokens: Vec<String>,
342 tags: BTreeMap<String, String>,
343 #[serde(default)]
344 content: Option<String>,
345 #[serde(default)]
346 uri: Option<String>,
347 #[serde(default)]
348 title: Option<String>,
349}
350
351impl LexDocument {
352 fn ensure_sections(&mut self) {
353 if !self.sections.is_empty() {
354 return;
355 }
356
357 if self.content.is_empty() {
358 return;
359 }
360
361 if self.content_lower.is_empty() {
362 self.content_lower = self.content.to_ascii_lowercase();
363 }
364
365 self.sections.push(LexSection {
366 offset: 0,
367 content: self.content.clone(),
368 content_lower: self.content_lower.clone(),
369 });
370 }
371}
372
373fn legacy_to_current(legacy: LegacyLexDocument) -> LexDocument {
374 let content = legacy.content.unwrap_or_default();
375 let content_lower = content.to_ascii_lowercase();
376 let sections = if content.is_empty() {
377 Vec::new()
378 } else {
379 vec![LexSection {
380 offset: 0,
381 content: content.clone(),
382 content_lower: content_lower.clone(),
383 }]
384 };
385 LexDocument {
386 frame_id: legacy.frame_id,
387 tokens: legacy.tokens,
388 tags: legacy.tags,
389 content,
390 content_lower,
391 uri: legacy.uri,
392 title: legacy.title,
393 sections,
394 }
395}
396
397#[derive(Debug, Clone)]
398pub struct LexSearchHit {
399 pub frame_id: FrameId,
400 pub score: f32,
401 pub match_count: usize,
402 pub snippets: Vec<String>,
403}
404
405#[derive(Debug, Clone)]
406pub(crate) struct LexMatch {
407 pub frame_id: FrameId,
408 pub score: f32,
409 pub occurrences: Vec<(usize, usize)>,
410 pub content: String,
411 pub uri: Option<String>,
412 pub title: Option<String>,
413 pub chunk_offset: usize,
414}
415
416fn tokenize(input: &str) -> Vec<String> {
417 input
418 .split(|c: char| !is_token_char(c))
419 .filter_map(|token| {
420 if token.chars().any(char::is_alphanumeric) {
421 Some(token.to_lowercase())
422 } else {
423 None
424 }
425 })
426 .collect()
427}
428
429fn is_token_char(ch: char) -> bool {
430 ch.is_alphanumeric() || matches!(ch, '&' | '@' | '+' | '/' | '_')
431}
432
433fn build_snippets(
434 content: &str,
435 occurrences: &[(usize, usize)],
436 window: usize,
437 max_snippets: usize,
438) -> Vec<String> {
439 compute_snippet_slices(content, occurrences, window, max_snippets)
440 .into_iter()
441 .map(|(start, end)| content[start..end].replace('\n', " "))
442 .collect()
443}
444
445fn chunk_sections(content: &str) -> Vec<LexSection> {
446 if content.is_empty() {
447 return Vec::new();
448 }
449
450 if content.len() <= LEX_SECTION_HARD_CHARS {
451 return vec![LexSection {
452 offset: 0,
453 content: content.to_string(),
454 content_lower: content.to_ascii_lowercase(),
455 }];
456 }
457
458 let mut sections: Vec<LexSection> = Vec::new();
459 let mut chunk_start = 0usize;
460 let mut last_soft_break = None;
461 let mut iter = content.char_indices().peekable();
462
463 while let Some((idx, ch)) = iter.next() {
464 let char_end = idx + ch.len_utf8();
465 let current_len = char_end.saturating_sub(chunk_start);
466 let next_char = iter.peek().map(|(_, next)| *next);
467
468 if is_soft_boundary(ch, next_char) {
469 last_soft_break = Some(char_end);
470 if current_len < LEX_SECTION_SOFT_CHARS {
471 continue;
472 }
473 }
474
475 if current_len < LEX_SECTION_HARD_CHARS {
476 continue;
477 }
478
479 let mut split_at = last_soft_break.unwrap_or(char_end);
480 if split_at <= chunk_start {
481 split_at = char_end;
482 }
483
484 push_section(&mut sections, content, chunk_start, split_at);
485 chunk_start = split_at;
486 last_soft_break = None;
487
488 if sections.len() >= LEX_SECTION_MAX_COUNT {
489 break;
490 }
491 }
492
493 if chunk_start < content.len() {
494 if sections.len() >= LEX_SECTION_MAX_COUNT {
495 if let Some(last) = sections.last_mut() {
496 let slice = &content[last.offset..];
497 last.content = slice.to_string();
498 last.content_lower = slice.to_ascii_lowercase();
499 }
500 } else {
501 push_section(&mut sections, content, chunk_start, content.len());
502 }
503 }
504
505 if sections.is_empty() {
506 sections.push(LexSection {
507 offset: 0,
508 content: content.to_string(),
509 content_lower: content.to_ascii_lowercase(),
510 });
511 }
512
513 sections
514}
515
516fn push_section(sections: &mut Vec<LexSection>, content: &str, start: usize, end: usize) {
517 if end <= start {
518 return;
519 }
520
521 let slice = &content[start..end];
522 sections.push(LexSection {
523 offset: start,
524 content: slice.to_string(),
525 content_lower: slice.to_ascii_lowercase(),
526 });
527}
528
529fn is_soft_boundary(ch: char, next: Option<char>) -> bool {
530 match ch {
531 '.' | '!' | '?' => next.is_none_or(char::is_whitespace),
532 '\n' => true,
533 _ => false,
534 }
535}
536
537pub(crate) fn compute_snippet_slices(
538 content: &str,
539 occurrences: &[(usize, usize)],
540 window: usize,
541 max_snippets: usize,
542) -> Vec<(usize, usize)> {
543 if content.is_empty() {
544 return Vec::new();
545 }
546
547 if occurrences.is_empty() {
548 let end = advance_boundary(content, 0, window);
549 return vec![(0, end)];
550 }
551
552 let mut merged: Vec<(usize, usize)> = Vec::new();
553 for &(start, end) in occurrences {
554 let mut snippet_start = start.saturating_sub(window / 2);
555 let mut snippet_end = (end + window / 2).min(content.len());
556
557 if let Some(adj) = sentence_start_before(content, snippet_start) {
558 snippet_start = adj;
559 }
560 if let Some(adj) = sentence_end_after(content, snippet_end) {
561 snippet_end = adj;
562 }
563
564 snippet_start = prev_char_boundary(content, snippet_start);
565 snippet_end = next_char_boundary(content, snippet_end);
566
567 if snippet_end <= snippet_start {
568 continue;
569 }
570
571 if let Some(last) = merged.last_mut() {
572 if snippet_start <= last.1 + 20 {
573 last.1 = last.1.max(snippet_end);
574 continue;
575 }
576 }
577
578 merged.push((
579 snippet_start.min(content.len()),
580 snippet_end.min(content.len()),
581 ));
582 if merged.len() >= max_snippets {
583 break;
584 }
585 }
586
587 if merged.is_empty() {
588 let end = advance_boundary(content, 0, window);
589 merged.push((0, end));
590 }
591
592 merged
593}
594
595fn sentence_start_before(content: &str, idx: usize) -> Option<usize> {
596 if idx == 0 {
597 return Some(0);
598 }
599 let mut idx = idx.min(content.len());
600 idx = prev_char_boundary(content, idx);
601 let mut candidate = None;
602 for (pos, ch) in content[..idx].char_indices() {
603 if matches!(ch, '.' | '!' | '?' | '\n') {
604 candidate = Some(pos + ch.len_utf8());
605 }
606 }
607 candidate.map(|pos| {
608 let mut pos = next_char_boundary(content, pos);
609 while pos < content.len() && content.as_bytes()[pos].is_ascii_whitespace() {
610 pos += 1;
611 }
612 prev_char_boundary(content, pos)
613 })
614}
615
616fn sentence_end_after(content: &str, idx: usize) -> Option<usize> {
617 if idx >= content.len() {
618 return Some(content.len());
619 }
620 let mut idx = idx;
621 idx = prev_char_boundary(content, idx);
622 for (offset, ch) in content[idx..].char_indices() {
623 let global = idx + offset;
624 if matches!(ch, '.' | '!' | '?') {
625 return Some(next_char_boundary(content, global + ch.len_utf8()));
626 }
627 if ch == '\n' {
628 return Some(global);
629 }
630 }
631 None
632}
633
634fn prev_char_boundary(content: &str, mut idx: usize) -> usize {
635 if idx > content.len() {
636 idx = content.len();
637 }
638 while idx > 0 && !content.is_char_boundary(idx) {
639 idx -= 1;
640 }
641 idx
642}
643
644fn next_char_boundary(content: &str, mut idx: usize) -> usize {
645 if idx > content.len() {
646 idx = content.len();
647 }
648 while idx < content.len() && !content.is_char_boundary(idx) {
649 idx += 1;
650 }
651 idx
652}
653
654fn advance_boundary(content: &str, start: usize, mut window: usize) -> usize {
655 if start >= content.len() {
656 return content.len();
657 }
658 let mut last = content.len();
659 for (offset, _) in content[start..].char_indices() {
660 if window == 0 {
661 return start + offset;
662 }
663 last = start + offset;
664 window -= 1;
665 }
666 content.len().max(last)
667}
668
669#[cfg(test)]
670mod tests {
671 use super::*;
672
673 #[test]
674 fn builder_produces_artifact() {
675 let mut builder = LexIndexBuilder::new();
676 let mut tags = HashMap::new();
677 tags.insert("source".into(), "test".into());
678 builder.add_document(0, "mv2://docs/one", Some("Doc One"), "hello world", &tags);
679 builder.add_document(
680 1,
681 "mv2://docs/two",
682 Some("Doc Two"),
683 "rust systems",
684 &HashMap::new(),
685 );
686
687 let artifact = builder.finish().expect("finish");
688 assert_eq!(artifact.doc_count, 2);
689 assert!(!artifact.bytes.is_empty());
690
691 let index = LexIndex::decode(&artifact.bytes).expect("decode");
692 let hits = index.search("rust", 10);
693 assert_eq!(hits.len(), 1);
694 assert_eq!(hits[0].frame_id, 1);
695 assert!(hits[0].match_count >= 1);
696 assert!(!hits[0].snippets.is_empty());
697 }
698
699 #[test]
700 fn tokenizer_lowercases_and_filters() {
701 let tokens = tokenize("Hello, Rust-lang!");
702 assert_eq!(tokens, vec!["hello", "rust", "lang"]);
703 }
704
705 #[test]
706 fn tokenizer_retains_connector_characters() {
707 let tokens = tokenize("N&M EXPRESS LLC @ 2024");
708 assert_eq!(tokens, vec!["n&m", "express", "llc", "2024"]);
709 }
710
711 #[test]
712 fn compute_matches_deduplicates_by_frame_id() {
713 let mut builder = LexIndexBuilder::new();
717
718 let section1 = "Quantum computing represents a revolutionary approach to computation. \
720 The fundamental principles of quantum mechanics enable quantum computers to process \
721 information in ways classical computers cannot. Quantum bits or qubits can exist in \
722 superposition states, allowing quantum algorithms to explore multiple solutions \
723 simultaneously. This quantum parallelism offers exponential speedups for certain \
724 computational problems. Researchers continue to advance quantum hardware and software. \
725 The field of quantum computing is rapidly evolving with new breakthroughs. \
726 Major tech companies invest heavily in quantum research and development. \
727 Quantum error correction remains a significant challenge for practical quantum computers.";
728
729 let section2 = "Applications of quantum computing span many domains including cryptography, \
730 drug discovery, and optimization problems. Quantum cryptography promises unbreakable \
731 encryption through quantum key distribution protocols. In the pharmaceutical industry, \
732 quantum simulations could revolutionize how we discover new medicines. Quantum \
733 algorithms like Shor's algorithm threaten current encryption standards. Financial \
734 institutions explore quantum computing for portfolio optimization. The quantum \
735 advantage may soon be demonstrated for practical real-world applications. Quantum \
736 machine learning combines quantum computing with artificial intelligence techniques. \
737 The future of quantum computing holds immense promise for scientific discovery.";
738
739 let full_content = format!("{} {}", section1, section2);
740 assert!(
741 full_content.len() > 1400,
742 "Content should be long enough to create multiple sections"
743 );
744
745 builder.add_document(
746 42, "mv2://docs/quantum",
748 Some("Quantum Computing Overview"),
749 &full_content,
750 &HashMap::new(),
751 );
752
753 let artifact = builder.finish().expect("finish should succeed");
754 let index = LexIndex::decode(&artifact.bytes).expect("decode should succeed");
755
756 let query_tokens = tokenize("quantum");
758 let matches = index.compute_matches(&query_tokens, None, None);
759
760 let frame_ids: Vec<_> = matches.iter().map(|m| m.frame_id).collect();
762 let unique_frame_ids: std::collections::HashSet<_> = frame_ids.iter().copied().collect();
763
764 assert_eq!(
765 frame_ids.len(),
766 unique_frame_ids.len(),
767 "Results should not contain duplicate frame_ids. Found: {:?}",
768 frame_ids
769 );
770
771 assert_eq!(matches.len(), 1, "Should have exactly one match");
773 assert_eq!(matches[0].frame_id, 42, "Match should be for frame_id 42");
774 assert!(matches[0].score > 0.0, "Match should have a positive score");
775 }
776
777 #[test]
778 fn compute_matches_keeps_highest_score_per_frame() {
779 let mut builder = LexIndexBuilder::new();
781
782 let section1 = "This is the first section with one target mention. \
784 It contains various other words to pad the content and make it long enough \
785 to be split into multiple sections by the chunking algorithm. We need quite \
786 a bit of text here to ensure the sections are created properly. The content \
787 continues with more filler text about various topics. Keep writing to reach \
788 the section boundary. More text follows to ensure we cross the soft limit. \
789 This should be enough to trigger section creation at the boundary point.";
790
791 let section2 = "The second section has target target target multiple times. \
792 Target appears here repeatedly: target target target target. This section \
793 should score higher because it has more occurrences of the search term target. \
794 We mention target again to boost the score further. Target target target. \
795 The abundance of target keywords makes this section rank higher in relevance.";
796
797 let full_content = format!("{} {}", section1, section2);
798
799 builder.add_document(
800 99,
801 "mv2://docs/multi-section",
802 Some("Multi-Section Document"),
803 &full_content,
804 &HashMap::new(),
805 );
806
807 let artifact = builder.finish().expect("finish");
808 let index = LexIndex::decode(&artifact.bytes).expect("decode");
809
810 let query_tokens = tokenize("target");
811 let matches = index.compute_matches(&query_tokens, None, None);
812
813 assert_eq!(
815 matches.len(),
816 1,
817 "Should have exactly one deduplicated match"
818 );
819
820 assert!(
823 matches[0].score >= 5.0,
824 "Should keep the highest-scoring match, score was: {}",
825 matches[0].score
826 );
827 }
828}