norm/
lib.rs

1//! This crate provides a collection of different distance metrics on strings.
2//!
3//! This problem is sometimes referred to as "string similarity search", or
4//! more colloquially "fuzzy matching". Given a query string and a number of
5//! possible candidate strings, the goal is to:
6//!
7//! a) filter out the candidates that are too dissimilar from the query;
8//!
9//! b) rank the remaining candidates by their similarity to the query.
10//!
11//! Here both of these tasks are accomplished by implementing the [`Metric`]
12//! trait. This trait is at the basis of norm's design, and it is implemented
13//! by all of our metrics. Reading its documentation is a good place to start.
14//!
15//! # Performance
16//!
17//! Performance is a top priority for this crate. Our goal is to have the
18//! fastest implementation of every metric algorithm we provide, across all
19//! languages. [Here][bench] you can find a number of benchmarks comparing
20//! norm's metrics to each other, as well as to other popular libraries.
21//!
22//! # Examples
23//!
24//! ```rust
25//! # use core::ops::Range;
26//! use norm::fzf::{FzfParser, FzfV2};
27//! use norm::Metric;
28//!
29//! let mut fzf = FzfV2::new();
30//!
31//! let mut parser = FzfParser::new();
32//!
33//! let query = parser.parse("aa");
34//!
35//! let cities = ["Geneva", "Ulaanbaatar", "New York City", "Adelaide"];
36//!
37//! let mut results = cities
38//!     .iter()
39//!     .copied()
40//!     .filter_map(|city| fzf.distance(query, city).map(|dist| (city, dist)))
41//!     .collect::<Vec<_>>();
42//!
43//! // We sort the results by distance in ascending order, so that the best
44//! // match will be at the front of the vector.
45//! results.sort_by_key(|(_city, dist)| *dist);
46//!
47//! assert_eq!(results.len(), 2);
48//! assert_eq!(results[0].0, "Adelaide");
49//! assert_eq!(results[1].0, "Ulaanbaatar");
50//!
51//! // We can also find out which sub-strings of each candidate matched the
52//! // query.
53//!
54//! let mut ranges: Vec<Range<usize>> = Vec::new();
55//!
56//! let _ = fzf.distance_and_ranges(query, results[0].0, &mut ranges);
57//! assert_eq!(ranges.len(), 2);
58//! assert_eq!(ranges[0], 0..1); // "A" in "Adelaide"
59//! assert_eq!(ranges[1], 4..5); // "a" in "Adelaide"
60//!
61//! ranges.clear();
62//!
63//! let _ = fzf.distance_and_ranges(query, results[1].0, &mut ranges);
64//! assert_eq!(ranges.len(), 1);
65//! assert_eq!(ranges[0], 2..4); // The first "aa" in "Ulaanbaatar"
66//! ```
67//!
68//! # Features flags
69//!
70//! - `fzf-v1`: enables the [`FzfV1`](metrics::fzf::FzfV1) metric;
71//! - `fzf-v2`: enables the [`FzfV2`](metrics::fzf::FzfV2) metric;
72//!
73//! [bench]: https://github.com/noib3/fuzzy-benches
74
75#![cfg_attr(docsrs, feature(doc_cfg))]
76#![allow(clippy::module_inception)]
77#![allow(clippy::needless_range_loop)]
78#![allow(clippy::too_many_arguments)]
79#![deny(missing_docs)]
80#![deny(rustdoc::broken_intra_doc_links)]
81#![deny(rustdoc::private_intra_doc_links)]
82
83extern crate alloc;
84
85#[cfg(feature = "__any-metric")]
86mod candidate;
87mod case_sensitivity;
88#[cfg(feature = "__any-metric")]
89mod matched_ranges;
90mod metric;
91#[cfg(feature = "__any-metric")]
92mod metrics;
93#[cfg(feature = "__any-metric")]
94mod normalize;
95#[cfg(feature = "__any-metric")]
96mod utils;
97
98#[cfg(feature = "__any-metric")]
99use candidate::{Candidate, CandidateMatches};
100pub use case_sensitivity::CaseSensitivity;
101#[cfg(feature = "__any-metric")]
102use matched_ranges::MatchedRanges;
103pub use metric::Metric;
104#[cfg(feature = "__any-metric")]
105pub use metrics::*;