rustwtxt/
parse.rs

1//! Lower-level parsing functions for when you don't want to use
2//! the provided `Twtxt` and `Tweet` objects.
3
4use std::collections::BTreeMap;
5
6use regex::Regex;
7
8type TwtxtErr<T> = std::result::Result<T, ErrorKind>;
9
10#[derive(Debug)]
11pub enum ErrorKind {
12    Metadata,
13    Keyword,
14    Regex,
15}
16
17impl std::fmt::Display for ErrorKind {
18    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19        let kind = match self {
20            ErrorKind::Metadata => "Metadata",
21            ErrorKind::Keyword => "Keyword",
22            ErrorKind::Regex => "Regex",
23        };
24        write!(f, "{}", kind)
25    }
26}
27
28impl std::error::Error for ErrorKind {}
29
30/// This parses out the specified information in the `== Metadata ==` section of
31/// a given `twtxt.txt` file.
32///
33/// # Examples
34/// ```
35/// # use rustwtxt;
36/// # use rustwtxt::parse;
37///
38/// let twtxt = if let Ok(val) = rustwtxt::pull_twtxt("https://example.org/twtxt.txt") {
39///     val
40/// } else {
41///     String::new()
42/// };
43///
44/// let out = parse::metadata(&twtxt, "nick");
45/// ```
46pub fn metadata(twtxt: &str, keyword: &str) -> TwtxtErr<String> {
47    if !twtxt.contains("== Metadata ==") && !twtxt.contains(keyword) {
48        return Err(ErrorKind::Metadata);
49    }
50
51    let regex_string = format!("{} = (.*)", keyword);
52
53    let regex = if let Ok(val) = Regex::new(&regex_string) {
54        val
55    } else {
56        return Err(ErrorKind::Regex);
57    };
58
59    let matched = if let Some(val) = regex.captures(twtxt) {
60        val
61    } else {
62        return Err(ErrorKind::Keyword);
63    };
64
65    let keyword_match = if let Some(val) = matched.get(1) {
66        val.as_str()
67    } else {
68        return Err(ErrorKind::Keyword);
69    };
70
71    Ok(keyword_match.to_string())
72}
73
74/// Pull the individual tweets from a remote `twtxt.txt` file into
75/// a `std::collections::BTreeMap<String, String>`, The timestamp
76/// is the key while the status is the value.
77pub fn statuses(twtxt: &str) -> Option<BTreeMap<String, String>> {
78    let mut map = BTreeMap::new();
79    let lines = twtxt.split('\n').collect::<Vec<&str>>();
80    lines.iter().for_each(|line| {
81        if line.starts_with('#') || line.len() < 2 || !line.contains('\t') {
82            return;
83        }
84
85        let status = line.split('\t').collect::<Vec<&str>>();
86        let datestamp = status[0];
87        map.insert(datestamp.into(), status[1].into());
88    });
89
90    if map.is_empty() {
91        return None;
92    }
93    Some(map)
94}
95
96/// Parse the mentions out of a `twtxt.txt` file. Returns a
97/// `std::collections::BTreeMap<String, String>` with the
98/// timestamp of the tweet as the key and the mention as
99/// the associated value.
100pub fn mentions(twtxt: &str) -> Option<BTreeMap<String, String>> {
101    let statuses = if let Some(val) = statuses(&twtxt) {
102        val
103    } else {
104        return None;
105    };
106    let mut map = BTreeMap::new();
107    statuses.iter().for_each(|(k, v)| {
108        if !v.contains("@<") {
109            return;
110        }
111
112        let regex = Regex::new(r"[@<].*[>]+").unwrap();
113        let out = if let Some(val) = regex.captures(v) {
114            match val.get(0) {
115                Some(n) => n.as_str(),
116                _ => return,
117            }
118        } else {
119            return;
120        };
121
122        let mention = out.to_string();
123        map.insert(k.to_string(), mention);
124    });
125
126    if map.is_empty() {
127        return None;
128    }
129    Some(map)
130}
131
132/// Takes a mention in the form of `@<nick https://example.com/twtxt.txt>`
133/// and reduces it to just the nickname.
134///
135/// # Examples
136/// ```
137/// # use rustwtxt;
138/// # use rustwtxt::parse;
139/// let status = "2019.09.09\tHey there, @<nickname https://example.com/twtxt.txt!>";
140/// let mention = parse::mention_to_nickname(status).unwrap();
141/// assert_eq!(mention, "nickname");
142/// ```
143pub fn mention_to_nickname(line: &str) -> Option<String> {
144    let regex = Regex::new(r"[@<].*[>]+").unwrap();
145    let mention = if let Some(val) = regex.captures(line) {
146        match val.get(0) {
147            Some(n) => n.as_str(),
148            _ => return None,
149        }
150    } else {
151        return None;
152    };
153
154    let mention_trimmed = mention[2..mention.len() - 1].to_string();
155    let mention_split = mention_trimmed.split(' ').collect::<Vec<&str>>();
156    Some(mention_split[0].into())
157}
158
159/// Parses out `#tags` from each tweet, returning a `std::collections::BTreeMap<String, String>`
160/// with the timestamp as the key, and the tag as the value.
161pub fn tags(twtxt: &str) -> Option<BTreeMap<String, String>> {
162    let statuses = if let Some(val) = statuses(&twtxt) {
163        val
164    } else {
165        return None;
166    };
167    let mut map = BTreeMap::new();
168    statuses.iter().for_each(|(k, v)| {
169        if !v.contains('#') {
170            return;
171        }
172
173        let regex = Regex::new(r"(^|\s)#[^\s]+").unwrap();
174        let tag: Vec<(String, String)> = regex
175            .find_iter(v)
176            .map(|ding| (k.clone(), ding.as_str().to_string()))
177            .collect();
178
179        let tags: Vec<(String, String)> = tag
180            .iter()
181            .map(|(k, v)| {
182                let v = v
183                    .chars()
184                    .map(|c| {
185                        if c.is_whitespace() {
186                            return "".into();
187                        }
188                        c.to_string()
189                    })
190                    .collect::<String>();
191                (k.clone(), v)
192            })
193            .collect();
194
195        let mut tag_group = String::new();
196        tags.iter().for_each(|(_, v)| {
197            tag_group.push_str(v);
198            tag_group.push_str(" ");
199        });
200
201        map.insert(k.to_string(), tag_group[..tag_group.len() - 1].to_string());
202    });
203
204    if map.is_empty() {
205        return None;
206    }
207    Some(map)
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    const TEST_URL: &str = "https://gbmor.dev/twtxt.txt";
215
216    #[test]
217    fn turn_mentions_to_nick() {
218        let twtxt = "2019.09.09\tHey @<gbmor https://gbmor.dev/twtxt.txt>!";
219        let mention = mention_to_nickname(twtxt).unwrap();
220        assert_eq!("gbmor", mention);
221    }
222
223    #[test]
224    fn get_tags() {
225        let tag_map = tags("test\t#test").unwrap();
226        assert!("#test" == &tag_map["test"]);
227
228        let tag_map = tags("test\tsome other #test here").unwrap();
229        assert!("#test" == &tag_map["test"]);
230
231        let tag_map = tags("test\tsome other #test").unwrap();
232        assert!("#test" == &tag_map["test"]);
233
234        let tag_map = tags("test\tsome #test goes #here").unwrap();
235        assert!("#test #here" == &tag_map["test"]);
236    }
237
238    #[test]
239    #[should_panic]
240    fn bad_regex() {
241        metadata("SOME DATA", "<#*#@(&$(%)@$)>").unwrap();
242    }
243
244    #[test]
245    #[should_panic]
246    fn no_matches() {
247        metadata("SOME = DATA", "nick").unwrap();
248    }
249
250    #[test]
251    fn get_mentions() {
252        let twtxt = crate::pull_twtxt(TEST_URL).unwrap();
253        let mention_map = mentions(&twtxt).unwrap();
254        assert!(mention_map.len() > 1);
255    }
256
257    #[test]
258    fn get_username() {
259        let res = crate::pull_twtxt(TEST_URL).unwrap();
260        let user = metadata(&res, "nick").unwrap();
261        assert_eq!("gbmor", user);
262    }
263
264    #[test]
265    fn get_url() {
266        let res = crate::pull_twtxt(TEST_URL).unwrap();
267        let url = metadata(&res, "url").unwrap();
268        assert_eq!(TEST_URL, url);
269    }
270
271    #[test]
272    fn get_status_map() {
273        let twtxt = crate::pull_twtxt(TEST_URL).unwrap();
274        let res = statuses(&twtxt).unwrap();
275        assert!(res.len() > 1);
276    }
277    #[test]
278    #[should_panic]
279    fn parse_bad_twtxt() {
280        metadata("SOMETHING GOES HERE", "url").unwrap();
281    }
282
283    #[test]
284    #[should_panic]
285    fn get_bad_statuses() {
286        statuses("").unwrap();
287    }
288}