drug_extraction_core/
lib.rs

1//! This library exists to support the CLI and Web UI applications.
2//!
3//! It exposes a limited API that could be utilized by other applications.
4//!
5//! HOWEVER, its development will always be driven by the needs of the CLI and Web UI applications.
6//!
7//! The main functionality is encompassed in [`Drug`], [`Search`], and [`SearchOutput`].
8
9use csv::WriterBuilder;
10use itertools::Itertools;
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13use std::error;
14use std::error::Error;
15use std::fmt;
16use std::fmt::Display;
17use std::str::FromStr;
18
19use strsim::{damerau_levenshtein, jaro_winkler, levenshtein, osa_distance, sorensen_dice};
20
21/// ValueError occurs when an invalid value was provided
22#[derive(Debug)]
23pub struct ValueError;
24
25impl Display for ValueError {
26    /// Formatting for ValueError
27    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
28        write!(f, "Received an unexpected value")
29    }
30}
31
32/// Make ValueError Error type
33impl error::Error for ValueError {}
34
35/// Type Alias for std::result::Result using ValueError
36type Result<T> = std::result::Result<T, ValueError>;
37
38/// Damerau Levenshtein Algorithm
39/// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
40fn my_damerau(a: &str, b: &str) -> f64 {
41    damerau_levenshtein(a, b) as f64
42}
43
44/// Levenshtein Algorithm
45/// https://en.wikipedia.org/wiki/Levenshtein_distance
46fn my_leven(a: &str, b: &str) -> f64 {
47    levenshtein(a, b) as f64
48}
49
50/// Optimal &str Alignment Algorithm (OSA)
51/// https://en.wikipedia.org/wiki/Optimal_string_alignment
52fn my_osa(a: &str, b: &str) -> f64 {
53    osa_distance(a, b) as f64
54}
55
56/// Jaro-Winkler Algorithm
57/// https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
58fn my_jw(a: &str, b: &str) -> f64 {
59    jaro_winkler(a, b) as f64
60}
61
62/// Sorensen-Dice Algorithm
63/// https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
64fn my_sd(a: &str, b: &str) -> f64 {
65    sorensen_dice(a, b) as f64
66}
67
68/// Initialize the distance function based on the selected [`Algorithm`]
69pub fn initialize_distance(a: Algorithm) -> fn(&str, &str) -> f64 {
70    match a {
71        Algorithm::DAMERAU => my_damerau,
72        Algorithm::LEVENSHTEIN => my_leven,
73        Algorithm::JAROWINKLER => my_jw,
74        Algorithm::OSA => my_osa,
75        Algorithm::SORENSENDICE => my_sd,
76    }
77}
78
79/// Algorithm enum
80#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
81pub enum Algorithm {
82    /// Damerau Levenshtein Algorithm
83    DAMERAU,
84    /// Levenshtein Algorithm
85    LEVENSHTEIN,
86    /// Jaro Winkler Algorithm
87    JAROWINKLER,
88    /// Optimal String Alignment Algorithm (OSA)
89    OSA,
90    /// Sorensen Dice Algorithm
91    SORENSENDICE,
92}
93
94impl Algorithm {
95    /// Utility function to see if the select Algorithm is returning an edit distance or similarity score.
96    pub fn is_edits(&self) -> bool {
97        match self {
98            Algorithm::OSA | Algorithm::DAMERAU | Algorithm::LEVENSHTEIN => true,
99            Algorithm::JAROWINKLER | Algorithm::SORENSENDICE => false,
100        }
101    }
102
103    /// Utility function to get a list of the available algorithms as a string
104    /// This is used for the CLI
105    pub fn options() -> &'static [&'static str] {
106        &[
107            "Levenshtein",
108            "Damerau",
109            "OSA",
110            "JaroWinkler",
111            "SorensenDice",
112        ]
113    }
114}
115
116impl FromStr for Algorithm {
117    type Err = ValueError;
118    /// Parses an Algorithm type from a string reference.
119    /// Only uses the first character of the string.
120    fn from_str(s: &str) -> Result<Algorithm> {
121        match s.to_uppercase().chars().next().unwrap_or('L') {
122            'L' => Ok(Algorithm::LEVENSHTEIN),
123            'D' => Ok(Algorithm::DAMERAU),
124            'O' => Ok(Algorithm::OSA),
125            'J' => Ok(Algorithm::JAROWINKLER),
126            'S' => Ok(Algorithm::SORENSENDICE),
127            _ => Err(ValueError),
128        }
129    }
130}
131
132impl ToString for Algorithm {
133    /// Converts an Algorithm type to a string representation.
134    fn to_string(&self) -> String {
135        match self {
136            Algorithm::DAMERAU => String::from("Damerau"),
137            Algorithm::LEVENSHTEIN => String::from("Levenshtein"),
138            Algorithm::OSA => String::from("OSA"),
139            Algorithm::JAROWINKLER => String::from("JaroWinkler"),
140            Algorithm::SORENSENDICE => String::from("SorensenDice"),
141        }
142    }
143}
144
145/// A struct to hold the results of a [`SimpleSearch::scan()`].
146///
147/// Simple Search focuses on comparing strings which could be anything provided by the user.
148///
149/// Simple Search uses [`SimpleSearch`] and [`SimpleResult`] to hold the input and output data.
150#[derive(Debug, Deserialize, Serialize, Clone)]
151pub struct SimpleResult {
152    /// The ID of the record being searched.
153    /// Can be empty if the record flag was not used in the CLI.
154    pub record_id: Option<String>,
155    /// The algorithm used to calculate the score.
156    pub algorithm: Algorithm,
157    /// The number of edits between the matched words.
158    ///
159    /// Can be empty if the [`Algorithm::is_edits()`] function returns false (thus the algorithm does not have an edit distance).
160    pub edits: Option<i32>,
161    /// The similarity score between the matched words.
162    ///
163    /// This will be computed regardless of the [`Algorithm::is_edits()`] function status since edit distance can be converted to similarity.
164    pub similarity: f64,
165    /// The search term.
166    pub search_term: String,
167    /// The matched word.
168    pub matched_term: String,
169}
170
171/// The desired output format.
172#[derive(Clone, Copy, Debug)]
173pub enum OutputFormat {
174    JSONL,
175    CSV,
176}
177
178impl FromStr for OutputFormat {
179    type Err = ValueError;
180    /// Parses an OutputFormat type from a string reference.
181    fn from_str(s: &str) -> Result<OutputFormat> {
182        match s.to_uppercase().as_str() {
183            "JSONL" => Ok(OutputFormat::JSONL),
184            "CSV" => Ok(OutputFormat::CSV),
185            _ => Err(ValueError),
186        }
187    }
188}
189
190/// Format the data in the desired output format.
191/// This is used for the CLI and the web API.
192/// The output format is determined by the [`OutputFormat`] enum.
193/// This uses serde_json::to_string_pretty for JSONL and csv::WriterBuilder for CSV.
194/// The output is returned as a Vector of Strings.
195/// For CSV, the first row is the column headers and the vector items will need to be string-joined with a comma.
196///
197/// Examples:
198/// TODO: Add examples
199pub fn format(
200    data: Vec<SearchOutput>,
201    format: OutputFormat,
202) -> std::result::Result<Vec<String>, Box<dyn Error>> {
203    match format {
204        OutputFormat::JSONL => {
205            Ok(data
206                .iter()
207                .map(|x| match x {
208                    SearchOutput::DrugResult(y) => serde_json::to_string(y)
209                        .expect("could not deserialize drug result to string"),
210                    SearchOutput::SimpleResult(y) => serde_json::to_string(y)
211                        .expect("could not deserialize simple result to string"),
212                })
213                .collect::<Vec<String>>())
214        }
215        OutputFormat::CSV => {
216            let mut wtr = WriterBuilder::new().has_headers(false).from_writer(vec![]);
217            for row in data {
218                wtr.serialize(row)?;
219            }
220            let csv_data = String::from_utf8(wtr.into_inner()?)?;
221            Ok(csv_data
222                .split('\n')
223                .map(|x| x.to_string())
224                .filter(|x| !x.is_empty())
225                .collect::<Vec<String>>())
226        }
227    }
228}
229
230/// A struct to hold the input into a Simple Search.
231///
232/// Simple Search focuses on comparing strings which could be anything provided by the user.
233///
234/// Simple Search uses [`SimpleSearch`] and [`SimpleResult`] to hold the input and output data.
235pub struct SimpleSearch {
236    /// The [`Algorithm`] to use.
237    pub algorithm: Algorithm,
238    /// The distance function to use, based on the [`Algorithm`] selected.
239    pub distance: fn(&str, &str) -> f64,
240    /// The **maximum** number of edits allowed.
241    ///
242    /// This *can* be None if the user does not want to limit the results based on the number of edits.
243    ///
244    /// This *should* be None if the [`Algorithm`] is not an edit distance.
245    pub max_edits: Option<i32>,
246    /// The *minimum* similarity score required.
247    ///
248    /// This **can** be None if the user does not want to limit the results based on the similarity score.
249    pub similarity_threshold: Option<f64>,
250    /// The target search words in the format of a vector of strings.
251    pub targets: Vec<String>,
252}
253
254impl SimpleSearch {
255    /// Create a new SimpleSearch struct.
256    pub fn new(
257        algorithm: Algorithm,
258        distance: fn(&str, &str) -> f64,
259        max_edits: Option<i32>,
260        similarity_threshold: Option<f64>,
261        targets: &[String],
262    ) -> SimpleSearch {
263        SimpleSearch {
264            algorithm,
265            distance,
266            max_edits,
267            similarity_threshold,
268            targets: targets.to_vec(),
269        }
270    }
271
272    fn manage_state(
273        &self,
274        state: &mut Option<HashMap<(String, String), f64>>,
275        word1: &str,
276        word2: &str,
277    ) -> f64 {
278        if let Some(state) = state {
279            *state
280                .entry((word1.to_string(), word2.to_string()))
281                .or_insert_with(|| (self.distance)(word1, word2))
282        } else {
283            (self.distance)(word1, word2)
284        }
285    }
286}
287
288/// A struct to hold the results of a [`Search::scan()`].
289///
290/// This is used for the CLI and the web API.
291///
292/// The enum will correspond to the type of search run (Simple/Drug).
293///
294/// This will show up in the JSONL and CSV output to assist the user in understanding the results.
295///
296/// TODO: Add examples
297#[derive(Debug, Clone, Serialize, Deserialize)]
298pub enum SearchOutput {
299    SimpleResult(SimpleResult),
300    DrugResult(DrugResult),
301}
302
303/// Search trait.
304pub trait Search {
305    /// Scan the data for matches.
306    fn scan(
307        &self,
308        text: &str,
309        record: Option<String>,
310        state: &mut Option<HashMap<(String, String), f64>>,
311    ) -> Vec<SearchOutput>;
312}
313
314impl Search for SimpleSearch {
315    /// Scanning function to find matches.
316    ///
317    /// Searches the input text for the target words. This also does some pre-processing to remove
318    /// punctuation and other non-alphanumeric characters as well as upper-casing the input text.
319    ///
320    /// The search will be limited by the number of edits and/or similarity threshold (if) provided in the [`SimpleSearch`] struct.
321    ///
322    /// The results will be returned as a vector of [`SimpleResult`] structs.
323    ///
324    /// # Examples
325    /// TODO: Add examples
326    /// ```rust
327    /// let search = SimpleSearch::new(Algorithm::Levenshtein, levenshtein, None, None, &["hello", "world"]);
328    /// let results = search.scan("hello world", None);
329    /// ```
330    ///
331    fn scan(
332        &self,
333        text: &str,
334        record: Option<String>,
335        state: &mut Option<HashMap<(String, String), f64>>,
336    ) -> Vec<SearchOutput> {
337        let clean = text
338            .replace(&['(', ')', ',', '\"', '.', ';', ':', ']', '['][..], " ")
339            .to_uppercase();
340        let words = clean.split_whitespace();
341        let mut results: Vec<SimpleResult> = Vec::new();
342        for word in words {
343            for target in &self.targets {
344                let mut word_pair = vec![word.trim().to_uppercase(), target.trim().to_uppercase()];
345                word_pair.sort();
346                let d = self.manage_state(state, &word_pair[0], &word_pair[1]);
347                let res = SimpleResult {
348                    record_id: record.clone(),
349                    search_term: target.to_string(),
350                    matched_term: word.to_string(),
351                    algorithm: self.algorithm,
352                    edits: if self.algorithm.is_edits() {
353                        Some(d as i32)
354                    } else {
355                        None
356                    },
357                    similarity: if self.algorithm.is_edits() {
358                        1.0 - (d / (target.chars().count().max(word.chars().count()) as f64))
359                    } else {
360                        d
361                    },
362                };
363                results.push(res);
364            }
365        }
366        if let Some(me) = self.max_edits {
367            // filter by edits
368            results
369                .into_iter()
370                .filter(|x| x.edits.expect("result did not have edits") <= me)
371                .map(SearchOutput::SimpleResult)
372                .collect::<Vec<SearchOutput>>()
373        } else if let Some(thresh) = self.similarity_threshold {
374            // filter by similarity
375            results
376                .into_iter()
377                .filter(|x| x.similarity >= thresh)
378                .map(SearchOutput::SimpleResult)
379                .collect::<Vec<SearchOutput>>()
380        } else {
381            // return all
382            results
383                .into_iter()
384                .map(SearchOutput::SimpleResult)
385                .collect()
386        }
387    }
388}
389
390/// A struct to hold data regarding a specific Drug.
391///
392/// This is used for the CLI and the web API.
393#[derive(Debug, Clone, Serialize, Deserialize)]
394pub struct Drug {
395    /// The name of the drug.
396    pub name: String,
397    /// The drug's RxNorm ID (RX_CUI).
398    pub rx_id: String,
399    /// The drug's RxClass ID.
400    pub class_id: String,
401}
402
403/// A struct to hold search information for a Drug Search.
404///
405/// Drug Search focuses on comparing drug names to target text.
406///
407/// Drug Search uses [`DrugSearch`] and [`DrugResult`] to hold the input and output data.
408pub struct DrugSearch {
409    /// The [`Algorithm`] to use.
410    pub algorithm: Algorithm,
411    /// The distance function to use, based on the [`Algorithm`] selected.
412    pub distance: fn(&str, &str) -> f64,
413    /// The **maximum** number of edits allowed.
414    ///
415    /// This *can* be None if the user does not want to limit the results based on the number of edits.
416    ///
417    /// This *should* be None if the [`Algorithm`] is not an edit distance.
418    pub max_edits: Option<i32>,
419    /// The *minimum* similarity score required.
420    pub similarity_threshold: Option<f64>,
421    /// The target search words in the format of a vector of [`Drug`]s.
422    pub targets: Vec<Drug>,
423}
424
425impl DrugSearch {
426    /// Create a new DrugSearch struct.
427    pub fn new(
428        algorithm: Algorithm,
429        distance: fn(&str, &str) -> f64,
430        max_edits: Option<i32>,
431        similarity_threshold: Option<f64>,
432        targets: &[Drug],
433    ) -> DrugSearch {
434        DrugSearch {
435            algorithm,
436            distance,
437            max_edits,
438            similarity_threshold,
439            targets: targets.to_vec(),
440        }
441    }
442    fn manage_state(
443        &self,
444        state: &mut Option<HashMap<(String, String), f64>>,
445        word1: &str,
446        word2: &str,
447    ) -> f64 {
448        if let Some(state) = state {
449            *state
450                .entry((word1.to_string(), word2.to_string()))
451                .or_insert_with(|| (self.distance)(word1, word2))
452        } else {
453            (self.distance)(word1, word2)
454        }
455    }
456}
457
458/// A struct to hold the results of a [`Search::scan()`].
459///
460/// Drug Search focuses on comparing drug names to target text.
461///
462/// Drug Search uses [`DrugSearch`] and [`DrugResult`] to hold the input and output data.
463#[derive(Debug, Deserialize, Serialize, Clone)]
464pub struct DrugResult {
465    pub record_id: Option<String>,
466    pub algorithm: Algorithm,
467    pub edits: Option<i32>,
468    pub similarity: f64,
469    pub matched_term: String,
470    pub drug: Drug,
471}
472
473impl Search for DrugSearch {
474    /// Scanning function to find matches.
475    ///
476    /// Searches the input text for the target drug names. This also does some pre-processing to remove
477    /// punctuation and other non-alphanumeric characters as well as upper-casing the input text.
478    ///
479    /// The search will be limited by the number of edits and/or similarity threshold (if) provided in the [`DrugSearch`] struct.
480    ///
481    /// The results will be returned as a vector of [`DrugResult`] structs.
482    ///
483    /// # Examples
484    /// TODO: Add examples
485    /// ```rust
486    /// let search = DrugSearch::new(Algorithm::Levenshtein, levenshtein, None, None, &["hello", "world"]);
487    /// let results = search.scan("hello world", None);
488    /// ```
489    ///
490    fn scan(
491        &self,
492        text: &str,
493        record: Option<String>,
494        state: &mut Option<HashMap<(String, String), f64>>,
495    ) -> Vec<SearchOutput> {
496        let clean = text
497            .replace(&['(', ')', ',', '\"', '.', ';', ':'][..], " ")
498            .to_uppercase();
499        let words = clean.split_whitespace();
500        let mut results: Vec<DrugResult> = Vec::new();
501        for word in words {
502            for target in &self.targets {
503                for t in target.name.split('/') {
504                    let mut word_pair = vec![word.trim().to_uppercase(), t.trim().to_uppercase()];
505                    word_pair.sort();
506                    let d = self.manage_state(state, &word_pair[0], &word_pair[1]);
507                    let res = DrugResult {
508                        record_id: record.clone(),
509                        matched_term: word.to_string(),
510                        algorithm: self.algorithm,
511                        edits: if self.algorithm.is_edits() {
512                            Some(d as i32)
513                        } else {
514                            None
515                        },
516                        similarity: if self.algorithm.is_edits() {
517                            1.0 - (d / (t.chars().count().max(word.chars().count()) as f64))
518                        } else {
519                            d
520                        },
521                        drug: target.to_owned(),
522                    };
523                    results.push(res);
524                }
525            }
526        }
527        if let Some(me) = self.max_edits {
528            // filter by edits
529            results
530                .into_iter()
531                .filter(|x| x.edits.expect("result did not have edits") <= me)
532                .map(SearchOutput::DrugResult)
533                .collect::<Vec<SearchOutput>>()
534        } else if let Some(thresh) = self.similarity_threshold {
535            // filter by similarity
536            results
537                .into_iter()
538                .filter(|x| x.similarity >= thresh)
539                .map(SearchOutput::DrugResult)
540                .collect::<Vec<SearchOutput>>()
541        } else {
542            // return all
543            results.into_iter().map(SearchOutput::DrugResult).collect()
544        }
545    }
546}
547
548/// A utility function to initialize the correct Searcher (Drug or Simple) based on user provided data.
549///
550/// Returns a Box<dyn Search> that will need to be unboxed.
551pub fn initialize_searcher(
552    algorithm: Algorithm,
553    distance: fn(&str, &str) -> f64,
554    max_edits: Option<i32>,
555    similarity_threshold: Option<f64>,
556    search_words: Option<&[String]>,
557    drug_list: Option<Vec<Drug>>,
558) -> Box<dyn Search> {
559    if let Some(drugs) = drug_list {
560        Box::new(DrugSearch::new(
561            algorithm,
562            distance,
563            max_edits,
564            similarity_threshold,
565            drugs.as_ref(),
566        ))
567    } else {
568        Box::new(SimpleSearch::new(
569            algorithm,
570            distance,
571            max_edits,
572            similarity_threshold,
573            search_words.unwrap_or_default(),
574        ))
575    }
576}
577
578/// A function to get some nice stats about the drugs in the list.
579pub fn analyze(
580    data: Vec<SearchOutput>,
581    total_targets: i32,
582    total_records: i32,
583    is_drug: bool,
584    has_id: bool,
585) -> Result<Vec<String>> {
586    let mut results: Vec<String> = Vec::new();
587    if data.is_empty() {
588        results.push("Unable to analyze, no matches found.".to_string());
589        return Ok(results);
590    }
591    if is_drug {
592        if has_id {
593            let mut found_targets: Vec<String> = Vec::new();
594            let mut found_ids: Vec<String> = Vec::new();
595            for r in data {
596                if let SearchOutput::DrugResult(drug) = r {
597                    found_targets.push(drug.drug.name.clone());
598                    found_ids.push(
599                        drug.record_id
600                            .as_ref()
601                            .expect("could not reference record id")
602                            .clone(),
603                    );
604                }
605            }
606            let unique_records = found_ids.clone().into_iter().collect::<HashSet<_>>();
607            results.push(format!(
608                "Found drugs in {} of {} records (~{:.2}%).",
609                unique_records.len(),
610                total_records,
611                100.0 * unique_records.len() as f64 / total_targets as f64
612            ));
613            let counts = found_ids.into_iter().counts();
614            let key_with_max_value = counts
615                .iter()
616                .max_by_key(|entry| entry.1)
617                .expect("could not find max");
618            results.push(format!(
619                "Most common record: {} (detected {} drugs)",
620                key_with_max_value.0, key_with_max_value.1
621            ));
622            let unique_targets = found_targets
623                .clone()
624                .into_iter()
625                .unique()
626                .collect::<HashSet<_>>();
627            results.push(format!(
628                "Found {} of {} drugs (~{:.2}%).",
629                unique_targets.len(),
630                total_targets,
631                100.0 * unique_targets.len() as f64 / total_targets as f64
632            ));
633            let counts = found_targets.into_iter().counts();
634            let key_with_max_value = counts
635                .iter()
636                .max_by_key(|entry| entry.1)
637                .expect("could not find max");
638            results.push(format!(
639                "The most common drug is {} with {} detections.",
640                key_with_max_value.0, key_with_max_value.1
641            ));
642        } else {
643            let mut found_targets: Vec<String> = Vec::new();
644            results.push("No record ID flag provided.".to_string());
645            for r in data {
646                if let SearchOutput::DrugResult(drug) = r {
647                    found_targets.push(drug.drug.name.clone());
648                }
649            }
650            let unique_targets = found_targets.into_iter().unique().collect::<HashSet<_>>();
651            results.push(format!(
652                "Found {} of {} drugs (~{:.2}%).",
653                unique_targets.len(),
654                total_targets,
655                100.0 * unique_targets.len() as f64 / total_targets as f64
656            ));
657            let counts = unique_targets.into_iter().counts();
658            let key_with_max_value = counts
659                .iter()
660                .max_by_key(|entry| entry.1)
661                .expect("could not find max");
662            results.push(format!(
663                "The most common drug is {} with {} detections.",
664                key_with_max_value.0, key_with_max_value.1
665            ));
666        }
667    } else if has_id {
668        let mut found_targets: Vec<String> = Vec::new();
669        let mut found_ids: Vec<String> = Vec::new();
670        for r in data {
671            if let SearchOutput::SimpleResult(simple) = r {
672                found_targets.push(simple.search_term.clone());
673                found_ids.push(
674                    simple
675                        .record_id
676                        .as_ref()
677                        .expect("could not reference record id")
678                        .clone(),
679                );
680            }
681        }
682        let unique_records = found_ids.clone().into_iter().collect::<HashSet<_>>();
683        results.push(format!(
684            "Found targets in {} of {} records (~{:.2}%).",
685            unique_records.len(),
686            total_records,
687            100.0 * unique_records.len() as f64 / total_records as f64,
688        ));
689        let counts = found_ids.clone().into_iter().counts();
690        let key_with_max_value = counts
691            .iter()
692            .max_by_key(|(_, v)| *v)
693            .expect("could not find max");
694        results.push(format!(
695            "Most common record: {} (detected {} targets)",
696            key_with_max_value.0, key_with_max_value.1
697        ));
698        let unique_targets = found_targets
699            .clone()
700            .into_iter()
701            .unique()
702            .collect::<HashSet<_>>();
703        results.push(format!(
704            "Found {} of {} targets (~{:.2}%).",
705            unique_targets.len(),
706            total_targets,
707            100.0 * unique_targets.len() as f64 / total_targets as f64
708        ));
709        let counts = found_targets.into_iter().counts();
710        let key_with_max_value = counts
711            .iter()
712            .max_by_key(|(_, v)| *v)
713            .expect("could not find max");
714        results.push(format!(
715            "The most common target is {} with {} detections.",
716            key_with_max_value.0, key_with_max_value.1
717        ));
718    } else {
719        let mut found_targets: Vec<String> = Vec::new();
720        results.push("No record ID flag provided.".to_string());
721        for r in data {
722            if let SearchOutput::SimpleResult(simple) = r {
723                found_targets.push(simple.search_term.clone());
724            }
725        }
726        let unique_targets = found_targets
727            .clone()
728            .into_iter()
729            .unique()
730            .collect::<HashSet<_>>();
731        results.push(format!(
732            "Found {} of {} targets (~{:.2}%).",
733            unique_targets.len(),
734            total_targets,
735            100.0 * unique_targets.len() as f64 / total_targets as f64
736        ));
737        let counts = found_targets.into_iter().counts();
738        let key_with_max_value = counts
739            .iter()
740            .max_by_key(|(_, v)| *v)
741            .expect("could not find max");
742        results.push(format!(
743            "The most common target is {} with {} detections.",
744            key_with_max_value.0, key_with_max_value.1
745        ));
746    }
747    Ok(results)
748}
749
750// test section
751#[cfg(test)]
752mod tests {
753    use super::*;
754
755    #[test]
756    fn test_parens() {
757        let test_str = "Mixed Drug Toxicity(Fentanyl, Cocaine, Xylazine and Gabapentin)";
758        let search = SimpleSearch::new(
759            Algorithm::LEVENSHTEIN,
760            my_leven,
761            Some(1),
762            None,
763            &["Fentanyl".to_uppercase(), "cocaine".to_uppercase()],
764        );
765        let results = search.scan(test_str, None, &mut None);
766        println!("{:?}", results);
767        assert_eq!(results.len(), 2);
768    }
769}
drug_extraction_core/lib.rs

drug_extraction_core/
lib.rs