rustwtxt/
lib.rs

1//! # rustwtxt
2//!
3//! This is a library intended to make working with `twtxt` timelines
4//! a bit easier.
5
6use std::collections::BTreeMap;
7use std::str::FromStr;
8
9use failure::format_err;
10use regex::Regex;
11use ureq;
12
13pub mod parse;
14
15type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
16pub type TweetMap = std::collections::BTreeMap<String, Tweet>;
17
18/// Holds tweets and metadata from a single `twtxt.txt` file.
19#[derive(Debug, Clone, Eq, PartialEq)]
20pub struct Twtxt {
21    nickname: String,
22    url: String,
23    tweets: TweetMap,
24}
25
26impl Twtxt {
27    /// Returns the nickname associated with the `twtxt.txt` file.
28    pub fn nick(&self) -> &str {
29        &self.nickname
30    }
31
32    /// Returns the URL associated with the `twtxt.txt` file.
33    pub fn url(&self) -> &str {
34        &self.url
35    }
36
37    /// Returns a specific tweet by the timestamp key.
38    pub fn tweet(&self, datestamp: &str) -> Option<&Tweet> {
39        if self.tweets.contains_key(datestamp) {
40            Some(&self.tweets[datestamp])
41        } else {
42            None
43        }
44    }
45
46    /// Returns all tweets as a `TweetMap`, a thin wrapper around a `BTreeMap`.
47    /// The tweets will be date-sorted.
48    pub fn tweets(&self) -> &TweetMap {
49        &self.tweets
50    }
51
52    /// Parse a remote `twtxt.txt` file into a `Twtxt` structure.
53    pub fn from(url: &str) -> Option<Twtxt> {
54        let twtxt = if let Ok(val) = pull_twtxt(&url) {
55            val
56        } else {
57            return None;
58        };
59
60        let url = url.to_owned();
61
62        let nickname = if let Ok(val) = parse::metadata(&twtxt, "nick") {
63            val
64        } else {
65            return None;
66        };
67
68        let mut tweets = BTreeMap::new();
69        twtxt
70            .split('\n')
71            .collect::<Vec<&str>>()
72            .iter()
73            .for_each(|line| {
74                if line.starts_with('#') || line == &"" || !line.contains('\t') {
75                    return;
76                }
77                let tweet = if let Ok(val) = Tweet::from_str(line) {
78                    val
79                } else {
80                    return;
81                };
82                tweets.insert(tweet.timestamp.clone(), tweet);
83            });
84
85        Some(Twtxt {
86            nickname,
87            url,
88            tweets,
89        })
90    }
91}
92
93/// Holds a single tweet.
94#[derive(Debug, Clone, Eq, PartialEq)]
95pub struct Tweet {
96    timestamp: String,
97    body: String,
98    mentions: Vec<String>,
99    tags: Vec<String>,
100}
101
102impl Tweet {
103    /// Returns the timestamp for a given tweet.
104    pub fn timestamp(&self) -> &str {
105        &self.timestamp
106    }
107
108    /// Returns the body of the tweet.
109    pub fn body(&self) -> &str {
110        &self.body
111    }
112
113    /// Any mentions within the body of the tweet have been parsed out
114    /// and are retrievable through this method.
115    pub fn mentions(&self) -> Vec<String> {
116        self.mentions.clone()
117    }
118
119    /// Any tags within the body of the tweet have been parsed out
120    /// and are retrievable through this method.
121    pub fn tags(&self) -> Vec<String> {
122        self.tags.clone()
123    }
124}
125
126impl std::str::FromStr for Tweet {
127    type Err = Box<dyn std::error::Error>;
128
129    /// Takes a properly-formatted `twtxt` tweet and parses it
130    /// into a `Tweet` structure.
131    fn from_str(tweet: &str) -> Result<Tweet> {
132        let split = tweet.split('\t').collect::<Vec<&str>>();
133        let timestamp = split[0].to_string();
134        let body = split[1].to_string();
135
136        let mentions_regex = Regex::new(r"[@<].*[>]+")?;
137        let tags_regex = Regex::new(r"(^|\s)#[^\s]+")?;
138
139        let mentions = mentions_regex
140            .find_iter(&body)
141            .map(|ding| ding.as_str().to_string())
142            .collect::<Vec<String>>();
143
144        let tags = tags_regex
145            .find_iter(&body)
146            .map(|ding| {
147                let tmp = ding.as_str();
148                let tmp = tmp.split(' ').collect::<Vec<&str>>();
149                if tmp[0] == "" && tmp.len() > 1 {
150                    return tmp[1].to_string();
151                }
152                tmp[0].to_string()
153            })
154            .collect::<Vec<String>>();
155
156        Ok(Tweet {
157            timestamp,
158            body,
159            mentions,
160            tags,
161        })
162    }
163}
164
165/// Pulls the target twtxt.txt file from the specified URL.
166///
167/// # Examples
168/// ```
169/// # use rustwtxt;
170/// let out = if let Ok(data) = rustwtxt::pull_twtxt("https://some-url-here.ext/twtxt.txt") {
171///               data
172///           } else {
173///               String::new()
174///           };
175/// ```
176pub fn pull_twtxt(url: &str) -> Result<String> {
177    let resp = ureq::get(&url).timeout_connect(5000).call();
178    if resp.error() {
179        return Err(Box::new(failure::Error::compat(format_err!(
180            "{} :: {}",
181            resp.status(),
182            &url
183        ))));
184    }
185
186    if let Ok(val) = resp.into_string() {
187        return Ok(val);
188    }
189    Err(Box::new(failure::Error::compat(format_err!(
190        "{} :: Internal Error",
191        &url
192    ))))
193}
194
195/// Wrapper to apply a function to each line of a `twtxt.txt` file,
196/// returning the resulting lines as a `Vec<String>`
197///
198/// # Examples
199/// ```
200/// # use rustwtxt;
201/// let input = "test\ntest";
202/// let output = rustwtxt::mutate(input, |line| {
203///         line.chars()
204///             .map(|c| c.to_uppercase().to_string())
205///             .collect::<String>()
206///     });
207/// assert_eq!("TEST", output[0]);
208/// ```
209pub fn mutate(twtxt: &str, f: fn(&str) -> String) -> Vec<String> {
210    twtxt
211        .to_owned()
212        .lines()
213        .map(|line| f(line))
214        .collect::<Vec<String>>()
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    const TEST_URL: &str = "https://gbmor.dev/twtxt.txt";
222
223    #[test]
224    fn the_structs() {
225        let twtxt = Twtxt::from(TEST_URL).unwrap();
226        assert_eq!("gbmor", twtxt.nick());
227        assert_eq!(TEST_URL, twtxt.url());
228        assert!(twtxt.tweets().len() > 1);
229
230        let (_, tweet) = twtxt.tweets().iter().next().unwrap();
231        assert!(tweet.body().len() > 1);
232        assert!(tweet.timestamp().len() > 1);
233        assert!(tweet.tags().is_empty());
234    }
235
236    #[test]
237    #[should_panic]
238    fn bad_twtxt_url() {
239        Twtxt::from("https://example.com/twtxt.txt").unwrap();
240    }
241
242    #[test]
243    fn make_twtxt() {
244        let rhs = Twtxt::from(TEST_URL).unwrap();
245        let tweets = BTreeMap::new();
246        let lhs = Twtxt {
247            nickname: String::from("gbmor"),
248            url: String::from("https://gbmor.dev/twtxt.txt"),
249            tweets,
250        };
251        assert_eq!(lhs.nickname, rhs.nickname);
252        assert_eq!(lhs.url, rhs.url);
253        assert!(rhs.tweets.len() > 1);
254    }
255
256    #[test]
257    fn test_mutate() {
258        let input = "test";
259        let rhs = mutate(input, |line| {
260            line.chars()
261                .map(|c| c.to_uppercase().to_string())
262                .collect::<String>()
263        });
264        assert_eq!("TEST", rhs[0]);
265    }
266
267    #[test]
268    fn test_pull_twtxt() {
269        let res = pull_twtxt(TEST_URL).unwrap();
270        assert!(res.contains("gbmor"));
271    }
272
273    #[test]
274    #[should_panic]
275    fn test_bad_url() {
276        pull_twtxt("https://example-some-fake-site-goes-here.com/some_fake_url.txt").unwrap();
277    }
278}