find_jaccard/
find_jaccard.rs

1use find_simdoc::JaccardSearcher;
2
3fn main() {
4    let documents = vec![
5        "Welcome to Jimbocho, the town of books and curry!",
6        "Welcome to Jimbocho, the city of books and curry!",
7        "We welcome you to Jimbocho, the town of books and curry.",
8        "Welcome to the town of books and curry, Jimbocho!",
9    ];
10
11    // Creates a searcher for character trigrams (with random seed value 42).
12    let searcher = JaccardSearcher::new(3, None, Some(42))
13        .unwrap()
14        // Builds the database of binary sketches converted from input documents,
15        // where binary sketches are in the Hamming space of 20*64 dimensions.
16        .build_sketches_in_parallel(documents.iter(), 20)
17        .unwrap();
18
19    // Searches all similar pairs within radius 0.25.
20    let results = searcher.search_similar_pairs(0.25);
21    assert_eq!(results, vec![(0, 1, 0.1875), (0, 3, 0.2296875)]);
22}