mollendorff_ref/
extract.rs1use regex::Regex;
4use std::collections::HashSet;
5
6#[must_use]
12pub fn extract_urls(content: &str) -> Vec<String> {
13 let re = Regex::new(r#"https?://[^\s\)>\]"'`]+"#).unwrap();
14
15 let mut seen = HashSet::new();
16 let mut urls = Vec::new();
17
18 for mat in re.find_iter(content) {
19 let url = mat.as_str();
20 let url = url.trim_end_matches([',', '.', ')', ']', ';', ':']);
22
23 if !seen.contains(url) {
24 seen.insert(url.to_string());
25 urls.push(url.to_string());
26 }
27 }
28
29 urls
30}
31
32#[must_use]
38pub fn extract_amounts(text: &str) -> Vec<AmountMatch> {
39 let re = Regex::new(r"\$([0-9,.]+)\s*(billion|million|B|M|K)?").unwrap();
40
41 re.captures_iter(text)
42 .take(10)
43 .map(|cap| AmountMatch {
44 value: cap[1].to_string(),
45 unit: cap.get(2).map(|m| m.as_str().to_string()),
46 raw: cap[0].to_string(),
47 })
48 .collect()
49}
50
51#[must_use]
57pub fn extract_percentages(text: &str) -> Vec<String> {
58 let re = Regex::new(r"([0-9,.]+)\s*%").unwrap();
59
60 re.find_iter(text)
61 .take(10)
62 .map(|m| m.as_str().to_string())
63 .collect()
64}
65
66#[derive(Debug, Clone, serde::Serialize)]
67pub struct AmountMatch {
68 pub value: String,
69 pub unit: Option<String>,
70 pub raw: String,
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76
77 #[test]
78 fn test_extract_urls() {
79 let content = r"
80 Check out https://example.com and
81 [link](https://foo.bar/path?q=1) for more.
82 Also http://old.site.org.
83 ";
84
85 let urls = extract_urls(content);
86 assert_eq!(urls.len(), 3);
87 assert!(urls.contains(&"https://example.com".to_string()));
88 assert!(urls.contains(&"https://foo.bar/path?q=1".to_string()));
89 assert!(urls.contains(&"http://old.site.org".to_string()));
90 }
91
92 #[test]
93 fn test_extract_urls_dedup() {
94 let content = "https://dup.com https://dup.com https://dup.com";
95 let urls = extract_urls(content);
96 assert_eq!(urls.len(), 1);
97 }
98
99 #[test]
100 fn test_extract_amounts() {
101 let text = "The market is worth $33 billion and growing to $48.2M";
102 let amounts = extract_amounts(text);
103 assert_eq!(amounts.len(), 2);
104 assert_eq!(amounts[0].value, "33");
105 assert_eq!(amounts[0].unit, Some("billion".to_string()));
106 assert_eq!(amounts[1].value, "48.2");
107 assert_eq!(amounts[1].unit, Some("M".to_string()));
108 }
109
110 #[test]
111 fn test_extract_percentages() {
112 let text = "Growth of 71% with 53% adoption rate";
113 let pcts = extract_percentages(text);
114 assert_eq!(pcts.len(), 2);
115 assert_eq!(pcts[0], "71%");
116 assert_eq!(pcts[1], "53%");
117 }
118}